drm/radeon: add indirect accessors for UVD CTX registers
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79
80 static const u32 tahiti_golden_rlc_registers[] =
81 {
82         0xc424, 0xffffffff, 0x00601005,
83         0xc47c, 0xffffffff, 0x10104040,
84         0xc488, 0xffffffff, 0x0100000a,
85         0xc314, 0xffffffff, 0x00000800,
86         0xc30c, 0xffffffff, 0x800000f4,
87         0xf4a8, 0xffffffff, 0x00000000
88 };
89
90 static const u32 tahiti_golden_registers[] =
91 {
92         0x9a10, 0x00010000, 0x00018208,
93         0x9830, 0xffffffff, 0x00000000,
94         0x9834, 0xf00fffff, 0x00000400,
95         0x9838, 0x0002021c, 0x00020200,
96         0xc78, 0x00000080, 0x00000000,
97         0xd030, 0x000300c0, 0x00800040,
98         0xd830, 0x000300c0, 0x00800040,
99         0x5bb0, 0x000000f0, 0x00000070,
100         0x5bc0, 0x00200000, 0x50100000,
101         0x7030, 0x31000311, 0x00000011,
102         0x277c, 0x00000003, 0x000007ff,
103         0x240c, 0x000007ff, 0x00000000,
104         0x8a14, 0xf000001f, 0x00000007,
105         0x8b24, 0xffffffff, 0x00ffffff,
106         0x8b10, 0x0000ff0f, 0x00000000,
107         0x28a4c, 0x07ffffff, 0x4e000000,
108         0x28350, 0x3f3f3fff, 0x2a00126a,
109         0x30, 0x000000ff, 0x0040,
110         0x34, 0x00000040, 0x00004040,
111         0x9100, 0x07ffffff, 0x03000000,
112         0x8e88, 0x01ff1f3f, 0x00000000,
113         0x8e84, 0x01ff1f3f, 0x00000000,
114         0x9060, 0x0000007f, 0x00000020,
115         0x9508, 0x00010000, 0x00010000,
116         0xac14, 0x00000200, 0x000002fb,
117         0xac10, 0xffffffff, 0x0000543b,
118         0xac0c, 0xffffffff, 0xa9210876,
119         0x88d0, 0xffffffff, 0x000fff40,
120         0x88d4, 0x0000001f, 0x00000010,
121         0x1410, 0x20000000, 0x20fffed8,
122         0x15c0, 0x000c0fc0, 0x000c0400
123 };
124
125 static const u32 tahiti_golden_registers2[] =
126 {
127         0xc64, 0x00000001, 0x00000001
128 };
129
130 static const u32 pitcairn_golden_rlc_registers[] =
131 {
132         0xc424, 0xffffffff, 0x00601004,
133         0xc47c, 0xffffffff, 0x10102020,
134         0xc488, 0xffffffff, 0x01000020,
135         0xc314, 0xffffffff, 0x00000800,
136         0xc30c, 0xffffffff, 0x800000a4
137 };
138
139 static const u32 pitcairn_golden_registers[] =
140 {
141         0x9a10, 0x00010000, 0x00018208,
142         0x9830, 0xffffffff, 0x00000000,
143         0x9834, 0xf00fffff, 0x00000400,
144         0x9838, 0x0002021c, 0x00020200,
145         0xc78, 0x00000080, 0x00000000,
146         0xd030, 0x000300c0, 0x00800040,
147         0xd830, 0x000300c0, 0x00800040,
148         0x5bb0, 0x000000f0, 0x00000070,
149         0x5bc0, 0x00200000, 0x50100000,
150         0x7030, 0x31000311, 0x00000011,
151         0x2ae4, 0x00073ffe, 0x000022a2,
152         0x240c, 0x000007ff, 0x00000000,
153         0x8a14, 0xf000001f, 0x00000007,
154         0x8b24, 0xffffffff, 0x00ffffff,
155         0x8b10, 0x0000ff0f, 0x00000000,
156         0x28a4c, 0x07ffffff, 0x4e000000,
157         0x28350, 0x3f3f3fff, 0x2a00126a,
158         0x30, 0x000000ff, 0x0040,
159         0x34, 0x00000040, 0x00004040,
160         0x9100, 0x07ffffff, 0x03000000,
161         0x9060, 0x0000007f, 0x00000020,
162         0x9508, 0x00010000, 0x00010000,
163         0xac14, 0x000003ff, 0x000000f7,
164         0xac10, 0xffffffff, 0x00000000,
165         0xac0c, 0xffffffff, 0x32761054,
166         0x88d4, 0x0000001f, 0x00000010,
167         0x15c0, 0x000c0fc0, 0x000c0400
168 };
169
170 static const u32 verde_golden_rlc_registers[] =
171 {
172         0xc424, 0xffffffff, 0x033f1005,
173         0xc47c, 0xffffffff, 0x10808020,
174         0xc488, 0xffffffff, 0x00800008,
175         0xc314, 0xffffffff, 0x00001000,
176         0xc30c, 0xffffffff, 0x80010014
177 };
178
179 static const u32 verde_golden_registers[] =
180 {
181         0x9a10, 0x00010000, 0x00018208,
182         0x9830, 0xffffffff, 0x00000000,
183         0x9834, 0xf00fffff, 0x00000400,
184         0x9838, 0x0002021c, 0x00020200,
185         0xc78, 0x00000080, 0x00000000,
186         0xd030, 0x000300c0, 0x00800040,
187         0xd030, 0x000300c0, 0x00800040,
188         0xd830, 0x000300c0, 0x00800040,
189         0xd830, 0x000300c0, 0x00800040,
190         0x5bb0, 0x000000f0, 0x00000070,
191         0x5bc0, 0x00200000, 0x50100000,
192         0x7030, 0x31000311, 0x00000011,
193         0x2ae4, 0x00073ffe, 0x000022a2,
194         0x2ae4, 0x00073ffe, 0x000022a2,
195         0x2ae4, 0x00073ffe, 0x000022a2,
196         0x240c, 0x000007ff, 0x00000000,
197         0x240c, 0x000007ff, 0x00000000,
198         0x240c, 0x000007ff, 0x00000000,
199         0x8a14, 0xf000001f, 0x00000007,
200         0x8a14, 0xf000001f, 0x00000007,
201         0x8a14, 0xf000001f, 0x00000007,
202         0x8b24, 0xffffffff, 0x00ffffff,
203         0x8b10, 0x0000ff0f, 0x00000000,
204         0x28a4c, 0x07ffffff, 0x4e000000,
205         0x28350, 0x3f3f3fff, 0x0000124a,
206         0x28350, 0x3f3f3fff, 0x0000124a,
207         0x28350, 0x3f3f3fff, 0x0000124a,
208         0x30, 0x000000ff, 0x0040,
209         0x34, 0x00000040, 0x00004040,
210         0x9100, 0x07ffffff, 0x03000000,
211         0x9100, 0x07ffffff, 0x03000000,
212         0x8e88, 0x01ff1f3f, 0x00000000,
213         0x8e88, 0x01ff1f3f, 0x00000000,
214         0x8e88, 0x01ff1f3f, 0x00000000,
215         0x8e84, 0x01ff1f3f, 0x00000000,
216         0x8e84, 0x01ff1f3f, 0x00000000,
217         0x8e84, 0x01ff1f3f, 0x00000000,
218         0x9060, 0x0000007f, 0x00000020,
219         0x9508, 0x00010000, 0x00010000,
220         0xac14, 0x000003ff, 0x00000003,
221         0xac14, 0x000003ff, 0x00000003,
222         0xac14, 0x000003ff, 0x00000003,
223         0xac10, 0xffffffff, 0x00000000,
224         0xac10, 0xffffffff, 0x00000000,
225         0xac10, 0xffffffff, 0x00000000,
226         0xac0c, 0xffffffff, 0x00001032,
227         0xac0c, 0xffffffff, 0x00001032,
228         0xac0c, 0xffffffff, 0x00001032,
229         0x88d4, 0x0000001f, 0x00000010,
230         0x88d4, 0x0000001f, 0x00000010,
231         0x88d4, 0x0000001f, 0x00000010,
232         0x15c0, 0x000c0fc0, 0x000c0400
233 };
234
235 static const u32 oland_golden_rlc_registers[] =
236 {
237         0xc424, 0xffffffff, 0x00601005,
238         0xc47c, 0xffffffff, 0x10104040,
239         0xc488, 0xffffffff, 0x0100000a,
240         0xc314, 0xffffffff, 0x00000800,
241         0xc30c, 0xffffffff, 0x800000f4
242 };
243
244 static const u32 oland_golden_registers[] =
245 {
246         0x9a10, 0x00010000, 0x00018208,
247         0x9830, 0xffffffff, 0x00000000,
248         0x9834, 0xf00fffff, 0x00000400,
249         0x9838, 0x0002021c, 0x00020200,
250         0xc78, 0x00000080, 0x00000000,
251         0xd030, 0x000300c0, 0x00800040,
252         0xd830, 0x000300c0, 0x00800040,
253         0x5bb0, 0x000000f0, 0x00000070,
254         0x5bc0, 0x00200000, 0x50100000,
255         0x7030, 0x31000311, 0x00000011,
256         0x2ae4, 0x00073ffe, 0x000022a2,
257         0x240c, 0x000007ff, 0x00000000,
258         0x8a14, 0xf000001f, 0x00000007,
259         0x8b24, 0xffffffff, 0x00ffffff,
260         0x8b10, 0x0000ff0f, 0x00000000,
261         0x28a4c, 0x07ffffff, 0x4e000000,
262         0x28350, 0x3f3f3fff, 0x00000082,
263         0x30, 0x000000ff, 0x0040,
264         0x34, 0x00000040, 0x00004040,
265         0x9100, 0x07ffffff, 0x03000000,
266         0x9060, 0x0000007f, 0x00000020,
267         0x9508, 0x00010000, 0x00010000,
268         0xac14, 0x000003ff, 0x000000f3,
269         0xac10, 0xffffffff, 0x00000000,
270         0xac0c, 0xffffffff, 0x00003210,
271         0x88d4, 0x0000001f, 0x00000010,
272         0x15c0, 0x000c0fc0, 0x000c0400
273 };
274
275 static const u32 hainan_golden_registers[] =
276 {
277         0x9a10, 0x00010000, 0x00018208,
278         0x9830, 0xffffffff, 0x00000000,
279         0x9834, 0xf00fffff, 0x00000400,
280         0x9838, 0x0002021c, 0x00020200,
281         0xd0c0, 0xff000fff, 0x00000100,
282         0xd030, 0x000300c0, 0x00800040,
283         0xd8c0, 0xff000fff, 0x00000100,
284         0xd830, 0x000300c0, 0x00800040,
285         0x2ae4, 0x00073ffe, 0x000022a2,
286         0x240c, 0x000007ff, 0x00000000,
287         0x8a14, 0xf000001f, 0x00000007,
288         0x8b24, 0xffffffff, 0x00ffffff,
289         0x8b10, 0x0000ff0f, 0x00000000,
290         0x28a4c, 0x07ffffff, 0x4e000000,
291         0x28350, 0x3f3f3fff, 0x00000000,
292         0x30, 0x000000ff, 0x0040,
293         0x34, 0x00000040, 0x00004040,
294         0x9100, 0x03e00000, 0x03600000,
295         0x9060, 0x0000007f, 0x00000020,
296         0x9508, 0x00010000, 0x00010000,
297         0xac14, 0x000003ff, 0x000000f1,
298         0xac10, 0xffffffff, 0x00000000,
299         0xac0c, 0xffffffff, 0x00003210,
300         0x88d4, 0x0000001f, 0x00000010,
301         0x15c0, 0x000c0fc0, 0x000c0400
302 };
303
304 static const u32 hainan_golden_registers2[] =
305 {
306         0x98f8, 0xffffffff, 0x02010001
307 };
308
309 static const u32 tahiti_mgcg_cgcg_init[] =
310 {
311         0xc400, 0xffffffff, 0xfffffffc,
312         0x802c, 0xffffffff, 0xe0000000,
313         0x9a60, 0xffffffff, 0x00000100,
314         0x92a4, 0xffffffff, 0x00000100,
315         0xc164, 0xffffffff, 0x00000100,
316         0x9774, 0xffffffff, 0x00000100,
317         0x8984, 0xffffffff, 0x06000100,
318         0x8a18, 0xffffffff, 0x00000100,
319         0x92a0, 0xffffffff, 0x00000100,
320         0xc380, 0xffffffff, 0x00000100,
321         0x8b28, 0xffffffff, 0x00000100,
322         0x9144, 0xffffffff, 0x00000100,
323         0x8d88, 0xffffffff, 0x00000100,
324         0x8d8c, 0xffffffff, 0x00000100,
325         0x9030, 0xffffffff, 0x00000100,
326         0x9034, 0xffffffff, 0x00000100,
327         0x9038, 0xffffffff, 0x00000100,
328         0x903c, 0xffffffff, 0x00000100,
329         0xad80, 0xffffffff, 0x00000100,
330         0xac54, 0xffffffff, 0x00000100,
331         0x897c, 0xffffffff, 0x06000100,
332         0x9868, 0xffffffff, 0x00000100,
333         0x9510, 0xffffffff, 0x00000100,
334         0xaf04, 0xffffffff, 0x00000100,
335         0xae04, 0xffffffff, 0x00000100,
336         0x949c, 0xffffffff, 0x00000100,
337         0x802c, 0xffffffff, 0xe0000000,
338         0x9160, 0xffffffff, 0x00010000,
339         0x9164, 0xffffffff, 0x00030002,
340         0x9168, 0xffffffff, 0x00040007,
341         0x916c, 0xffffffff, 0x00060005,
342         0x9170, 0xffffffff, 0x00090008,
343         0x9174, 0xffffffff, 0x00020001,
344         0x9178, 0xffffffff, 0x00040003,
345         0x917c, 0xffffffff, 0x00000007,
346         0x9180, 0xffffffff, 0x00060005,
347         0x9184, 0xffffffff, 0x00090008,
348         0x9188, 0xffffffff, 0x00030002,
349         0x918c, 0xffffffff, 0x00050004,
350         0x9190, 0xffffffff, 0x00000008,
351         0x9194, 0xffffffff, 0x00070006,
352         0x9198, 0xffffffff, 0x000a0009,
353         0x919c, 0xffffffff, 0x00040003,
354         0x91a0, 0xffffffff, 0x00060005,
355         0x91a4, 0xffffffff, 0x00000009,
356         0x91a8, 0xffffffff, 0x00080007,
357         0x91ac, 0xffffffff, 0x000b000a,
358         0x91b0, 0xffffffff, 0x00050004,
359         0x91b4, 0xffffffff, 0x00070006,
360         0x91b8, 0xffffffff, 0x0008000b,
361         0x91bc, 0xffffffff, 0x000a0009,
362         0x91c0, 0xffffffff, 0x000d000c,
363         0x91c4, 0xffffffff, 0x00060005,
364         0x91c8, 0xffffffff, 0x00080007,
365         0x91cc, 0xffffffff, 0x0000000b,
366         0x91d0, 0xffffffff, 0x000a0009,
367         0x91d4, 0xffffffff, 0x000d000c,
368         0x91d8, 0xffffffff, 0x00070006,
369         0x91dc, 0xffffffff, 0x00090008,
370         0x91e0, 0xffffffff, 0x0000000c,
371         0x91e4, 0xffffffff, 0x000b000a,
372         0x91e8, 0xffffffff, 0x000e000d,
373         0x91ec, 0xffffffff, 0x00080007,
374         0x91f0, 0xffffffff, 0x000a0009,
375         0x91f4, 0xffffffff, 0x0000000d,
376         0x91f8, 0xffffffff, 0x000c000b,
377         0x91fc, 0xffffffff, 0x000f000e,
378         0x9200, 0xffffffff, 0x00090008,
379         0x9204, 0xffffffff, 0x000b000a,
380         0x9208, 0xffffffff, 0x000c000f,
381         0x920c, 0xffffffff, 0x000e000d,
382         0x9210, 0xffffffff, 0x00110010,
383         0x9214, 0xffffffff, 0x000a0009,
384         0x9218, 0xffffffff, 0x000c000b,
385         0x921c, 0xffffffff, 0x0000000f,
386         0x9220, 0xffffffff, 0x000e000d,
387         0x9224, 0xffffffff, 0x00110010,
388         0x9228, 0xffffffff, 0x000b000a,
389         0x922c, 0xffffffff, 0x000d000c,
390         0x9230, 0xffffffff, 0x00000010,
391         0x9234, 0xffffffff, 0x000f000e,
392         0x9238, 0xffffffff, 0x00120011,
393         0x923c, 0xffffffff, 0x000c000b,
394         0x9240, 0xffffffff, 0x000e000d,
395         0x9244, 0xffffffff, 0x00000011,
396         0x9248, 0xffffffff, 0x0010000f,
397         0x924c, 0xffffffff, 0x00130012,
398         0x9250, 0xffffffff, 0x000d000c,
399         0x9254, 0xffffffff, 0x000f000e,
400         0x9258, 0xffffffff, 0x00100013,
401         0x925c, 0xffffffff, 0x00120011,
402         0x9260, 0xffffffff, 0x00150014,
403         0x9264, 0xffffffff, 0x000e000d,
404         0x9268, 0xffffffff, 0x0010000f,
405         0x926c, 0xffffffff, 0x00000013,
406         0x9270, 0xffffffff, 0x00120011,
407         0x9274, 0xffffffff, 0x00150014,
408         0x9278, 0xffffffff, 0x000f000e,
409         0x927c, 0xffffffff, 0x00110010,
410         0x9280, 0xffffffff, 0x00000014,
411         0x9284, 0xffffffff, 0x00130012,
412         0x9288, 0xffffffff, 0x00160015,
413         0x928c, 0xffffffff, 0x0010000f,
414         0x9290, 0xffffffff, 0x00120011,
415         0x9294, 0xffffffff, 0x00000015,
416         0x9298, 0xffffffff, 0x00140013,
417         0x929c, 0xffffffff, 0x00170016,
418         0x9150, 0xffffffff, 0x96940200,
419         0x8708, 0xffffffff, 0x00900100,
420         0xc478, 0xffffffff, 0x00000080,
421         0xc404, 0xffffffff, 0x0020003f,
422         0x30, 0xffffffff, 0x0000001c,
423         0x34, 0x000f0000, 0x000f0000,
424         0x160c, 0xffffffff, 0x00000100,
425         0x1024, 0xffffffff, 0x00000100,
426         0x102c, 0x00000101, 0x00000000,
427         0x20a8, 0xffffffff, 0x00000104,
428         0x264c, 0x000c0000, 0x000c0000,
429         0x2648, 0x000c0000, 0x000c0000,
430         0x55e4, 0xff000fff, 0x00000100,
431         0x55e8, 0x00000001, 0x00000001,
432         0x2f50, 0x00000001, 0x00000001,
433         0x30cc, 0xc0000fff, 0x00000104,
434         0xc1e4, 0x00000001, 0x00000001,
435         0xd0c0, 0xfffffff0, 0x00000100,
436         0xd8c0, 0xfffffff0, 0x00000100
437 };
438
439 static const u32 pitcairn_mgcg_cgcg_init[] =
440 {
441         0xc400, 0xffffffff, 0xfffffffc,
442         0x802c, 0xffffffff, 0xe0000000,
443         0x9a60, 0xffffffff, 0x00000100,
444         0x92a4, 0xffffffff, 0x00000100,
445         0xc164, 0xffffffff, 0x00000100,
446         0x9774, 0xffffffff, 0x00000100,
447         0x8984, 0xffffffff, 0x06000100,
448         0x8a18, 0xffffffff, 0x00000100,
449         0x92a0, 0xffffffff, 0x00000100,
450         0xc380, 0xffffffff, 0x00000100,
451         0x8b28, 0xffffffff, 0x00000100,
452         0x9144, 0xffffffff, 0x00000100,
453         0x8d88, 0xffffffff, 0x00000100,
454         0x8d8c, 0xffffffff, 0x00000100,
455         0x9030, 0xffffffff, 0x00000100,
456         0x9034, 0xffffffff, 0x00000100,
457         0x9038, 0xffffffff, 0x00000100,
458         0x903c, 0xffffffff, 0x00000100,
459         0xad80, 0xffffffff, 0x00000100,
460         0xac54, 0xffffffff, 0x00000100,
461         0x897c, 0xffffffff, 0x06000100,
462         0x9868, 0xffffffff, 0x00000100,
463         0x9510, 0xffffffff, 0x00000100,
464         0xaf04, 0xffffffff, 0x00000100,
465         0xae04, 0xffffffff, 0x00000100,
466         0x949c, 0xffffffff, 0x00000100,
467         0x802c, 0xffffffff, 0xe0000000,
468         0x9160, 0xffffffff, 0x00010000,
469         0x9164, 0xffffffff, 0x00030002,
470         0x9168, 0xffffffff, 0x00040007,
471         0x916c, 0xffffffff, 0x00060005,
472         0x9170, 0xffffffff, 0x00090008,
473         0x9174, 0xffffffff, 0x00020001,
474         0x9178, 0xffffffff, 0x00040003,
475         0x917c, 0xffffffff, 0x00000007,
476         0x9180, 0xffffffff, 0x00060005,
477         0x9184, 0xffffffff, 0x00090008,
478         0x9188, 0xffffffff, 0x00030002,
479         0x918c, 0xffffffff, 0x00050004,
480         0x9190, 0xffffffff, 0x00000008,
481         0x9194, 0xffffffff, 0x00070006,
482         0x9198, 0xffffffff, 0x000a0009,
483         0x919c, 0xffffffff, 0x00040003,
484         0x91a0, 0xffffffff, 0x00060005,
485         0x91a4, 0xffffffff, 0x00000009,
486         0x91a8, 0xffffffff, 0x00080007,
487         0x91ac, 0xffffffff, 0x000b000a,
488         0x91b0, 0xffffffff, 0x00050004,
489         0x91b4, 0xffffffff, 0x00070006,
490         0x91b8, 0xffffffff, 0x0008000b,
491         0x91bc, 0xffffffff, 0x000a0009,
492         0x91c0, 0xffffffff, 0x000d000c,
493         0x9200, 0xffffffff, 0x00090008,
494         0x9204, 0xffffffff, 0x000b000a,
495         0x9208, 0xffffffff, 0x000c000f,
496         0x920c, 0xffffffff, 0x000e000d,
497         0x9210, 0xffffffff, 0x00110010,
498         0x9214, 0xffffffff, 0x000a0009,
499         0x9218, 0xffffffff, 0x000c000b,
500         0x921c, 0xffffffff, 0x0000000f,
501         0x9220, 0xffffffff, 0x000e000d,
502         0x9224, 0xffffffff, 0x00110010,
503         0x9228, 0xffffffff, 0x000b000a,
504         0x922c, 0xffffffff, 0x000d000c,
505         0x9230, 0xffffffff, 0x00000010,
506         0x9234, 0xffffffff, 0x000f000e,
507         0x9238, 0xffffffff, 0x00120011,
508         0x923c, 0xffffffff, 0x000c000b,
509         0x9240, 0xffffffff, 0x000e000d,
510         0x9244, 0xffffffff, 0x00000011,
511         0x9248, 0xffffffff, 0x0010000f,
512         0x924c, 0xffffffff, 0x00130012,
513         0x9250, 0xffffffff, 0x000d000c,
514         0x9254, 0xffffffff, 0x000f000e,
515         0x9258, 0xffffffff, 0x00100013,
516         0x925c, 0xffffffff, 0x00120011,
517         0x9260, 0xffffffff, 0x00150014,
518         0x9150, 0xffffffff, 0x96940200,
519         0x8708, 0xffffffff, 0x00900100,
520         0xc478, 0xffffffff, 0x00000080,
521         0xc404, 0xffffffff, 0x0020003f,
522         0x30, 0xffffffff, 0x0000001c,
523         0x34, 0x000f0000, 0x000f0000,
524         0x160c, 0xffffffff, 0x00000100,
525         0x1024, 0xffffffff, 0x00000100,
526         0x102c, 0x00000101, 0x00000000,
527         0x20a8, 0xffffffff, 0x00000104,
528         0x55e4, 0xff000fff, 0x00000100,
529         0x55e8, 0x00000001, 0x00000001,
530         0x2f50, 0x00000001, 0x00000001,
531         0x30cc, 0xc0000fff, 0x00000104,
532         0xc1e4, 0x00000001, 0x00000001,
533         0xd0c0, 0xfffffff0, 0x00000100,
534         0xd8c0, 0xfffffff0, 0x00000100
535 };
536
537 static const u32 verde_mgcg_cgcg_init[] =
538 {
539         0xc400, 0xffffffff, 0xfffffffc,
540         0x802c, 0xffffffff, 0xe0000000,
541         0x9a60, 0xffffffff, 0x00000100,
542         0x92a4, 0xffffffff, 0x00000100,
543         0xc164, 0xffffffff, 0x00000100,
544         0x9774, 0xffffffff, 0x00000100,
545         0x8984, 0xffffffff, 0x06000100,
546         0x8a18, 0xffffffff, 0x00000100,
547         0x92a0, 0xffffffff, 0x00000100,
548         0xc380, 0xffffffff, 0x00000100,
549         0x8b28, 0xffffffff, 0x00000100,
550         0x9144, 0xffffffff, 0x00000100,
551         0x8d88, 0xffffffff, 0x00000100,
552         0x8d8c, 0xffffffff, 0x00000100,
553         0x9030, 0xffffffff, 0x00000100,
554         0x9034, 0xffffffff, 0x00000100,
555         0x9038, 0xffffffff, 0x00000100,
556         0x903c, 0xffffffff, 0x00000100,
557         0xad80, 0xffffffff, 0x00000100,
558         0xac54, 0xffffffff, 0x00000100,
559         0x897c, 0xffffffff, 0x06000100,
560         0x9868, 0xffffffff, 0x00000100,
561         0x9510, 0xffffffff, 0x00000100,
562         0xaf04, 0xffffffff, 0x00000100,
563         0xae04, 0xffffffff, 0x00000100,
564         0x949c, 0xffffffff, 0x00000100,
565         0x802c, 0xffffffff, 0xe0000000,
566         0x9160, 0xffffffff, 0x00010000,
567         0x9164, 0xffffffff, 0x00030002,
568         0x9168, 0xffffffff, 0x00040007,
569         0x916c, 0xffffffff, 0x00060005,
570         0x9170, 0xffffffff, 0x00090008,
571         0x9174, 0xffffffff, 0x00020001,
572         0x9178, 0xffffffff, 0x00040003,
573         0x917c, 0xffffffff, 0x00000007,
574         0x9180, 0xffffffff, 0x00060005,
575         0x9184, 0xffffffff, 0x00090008,
576         0x9188, 0xffffffff, 0x00030002,
577         0x918c, 0xffffffff, 0x00050004,
578         0x9190, 0xffffffff, 0x00000008,
579         0x9194, 0xffffffff, 0x00070006,
580         0x9198, 0xffffffff, 0x000a0009,
581         0x919c, 0xffffffff, 0x00040003,
582         0x91a0, 0xffffffff, 0x00060005,
583         0x91a4, 0xffffffff, 0x00000009,
584         0x91a8, 0xffffffff, 0x00080007,
585         0x91ac, 0xffffffff, 0x000b000a,
586         0x91b0, 0xffffffff, 0x00050004,
587         0x91b4, 0xffffffff, 0x00070006,
588         0x91b8, 0xffffffff, 0x0008000b,
589         0x91bc, 0xffffffff, 0x000a0009,
590         0x91c0, 0xffffffff, 0x000d000c,
591         0x9200, 0xffffffff, 0x00090008,
592         0x9204, 0xffffffff, 0x000b000a,
593         0x9208, 0xffffffff, 0x000c000f,
594         0x920c, 0xffffffff, 0x000e000d,
595         0x9210, 0xffffffff, 0x00110010,
596         0x9214, 0xffffffff, 0x000a0009,
597         0x9218, 0xffffffff, 0x000c000b,
598         0x921c, 0xffffffff, 0x0000000f,
599         0x9220, 0xffffffff, 0x000e000d,
600         0x9224, 0xffffffff, 0x00110010,
601         0x9228, 0xffffffff, 0x000b000a,
602         0x922c, 0xffffffff, 0x000d000c,
603         0x9230, 0xffffffff, 0x00000010,
604         0x9234, 0xffffffff, 0x000f000e,
605         0x9238, 0xffffffff, 0x00120011,
606         0x923c, 0xffffffff, 0x000c000b,
607         0x9240, 0xffffffff, 0x000e000d,
608         0x9244, 0xffffffff, 0x00000011,
609         0x9248, 0xffffffff, 0x0010000f,
610         0x924c, 0xffffffff, 0x00130012,
611         0x9250, 0xffffffff, 0x000d000c,
612         0x9254, 0xffffffff, 0x000f000e,
613         0x9258, 0xffffffff, 0x00100013,
614         0x925c, 0xffffffff, 0x00120011,
615         0x9260, 0xffffffff, 0x00150014,
616         0x9150, 0xffffffff, 0x96940200,
617         0x8708, 0xffffffff, 0x00900100,
618         0xc478, 0xffffffff, 0x00000080,
619         0xc404, 0xffffffff, 0x0020003f,
620         0x30, 0xffffffff, 0x0000001c,
621         0x34, 0x000f0000, 0x000f0000,
622         0x160c, 0xffffffff, 0x00000100,
623         0x1024, 0xffffffff, 0x00000100,
624         0x102c, 0x00000101, 0x00000000,
625         0x20a8, 0xffffffff, 0x00000104,
626         0x264c, 0x000c0000, 0x000c0000,
627         0x2648, 0x000c0000, 0x000c0000,
628         0x55e4, 0xff000fff, 0x00000100,
629         0x55e8, 0x00000001, 0x00000001,
630         0x2f50, 0x00000001, 0x00000001,
631         0x30cc, 0xc0000fff, 0x00000104,
632         0xc1e4, 0x00000001, 0x00000001,
633         0xd0c0, 0xfffffff0, 0x00000100,
634         0xd8c0, 0xfffffff0, 0x00000100
635 };
636
637 static const u32 oland_mgcg_cgcg_init[] =
638 {
639         0xc400, 0xffffffff, 0xfffffffc,
640         0x802c, 0xffffffff, 0xe0000000,
641         0x9a60, 0xffffffff, 0x00000100,
642         0x92a4, 0xffffffff, 0x00000100,
643         0xc164, 0xffffffff, 0x00000100,
644         0x9774, 0xffffffff, 0x00000100,
645         0x8984, 0xffffffff, 0x06000100,
646         0x8a18, 0xffffffff, 0x00000100,
647         0x92a0, 0xffffffff, 0x00000100,
648         0xc380, 0xffffffff, 0x00000100,
649         0x8b28, 0xffffffff, 0x00000100,
650         0x9144, 0xffffffff, 0x00000100,
651         0x8d88, 0xffffffff, 0x00000100,
652         0x8d8c, 0xffffffff, 0x00000100,
653         0x9030, 0xffffffff, 0x00000100,
654         0x9034, 0xffffffff, 0x00000100,
655         0x9038, 0xffffffff, 0x00000100,
656         0x903c, 0xffffffff, 0x00000100,
657         0xad80, 0xffffffff, 0x00000100,
658         0xac54, 0xffffffff, 0x00000100,
659         0x897c, 0xffffffff, 0x06000100,
660         0x9868, 0xffffffff, 0x00000100,
661         0x9510, 0xffffffff, 0x00000100,
662         0xaf04, 0xffffffff, 0x00000100,
663         0xae04, 0xffffffff, 0x00000100,
664         0x949c, 0xffffffff, 0x00000100,
665         0x802c, 0xffffffff, 0xe0000000,
666         0x9160, 0xffffffff, 0x00010000,
667         0x9164, 0xffffffff, 0x00030002,
668         0x9168, 0xffffffff, 0x00040007,
669         0x916c, 0xffffffff, 0x00060005,
670         0x9170, 0xffffffff, 0x00090008,
671         0x9174, 0xffffffff, 0x00020001,
672         0x9178, 0xffffffff, 0x00040003,
673         0x917c, 0xffffffff, 0x00000007,
674         0x9180, 0xffffffff, 0x00060005,
675         0x9184, 0xffffffff, 0x00090008,
676         0x9188, 0xffffffff, 0x00030002,
677         0x918c, 0xffffffff, 0x00050004,
678         0x9190, 0xffffffff, 0x00000008,
679         0x9194, 0xffffffff, 0x00070006,
680         0x9198, 0xffffffff, 0x000a0009,
681         0x919c, 0xffffffff, 0x00040003,
682         0x91a0, 0xffffffff, 0x00060005,
683         0x91a4, 0xffffffff, 0x00000009,
684         0x91a8, 0xffffffff, 0x00080007,
685         0x91ac, 0xffffffff, 0x000b000a,
686         0x91b0, 0xffffffff, 0x00050004,
687         0x91b4, 0xffffffff, 0x00070006,
688         0x91b8, 0xffffffff, 0x0008000b,
689         0x91bc, 0xffffffff, 0x000a0009,
690         0x91c0, 0xffffffff, 0x000d000c,
691         0x91c4, 0xffffffff, 0x00060005,
692         0x91c8, 0xffffffff, 0x00080007,
693         0x91cc, 0xffffffff, 0x0000000b,
694         0x91d0, 0xffffffff, 0x000a0009,
695         0x91d4, 0xffffffff, 0x000d000c,
696         0x9150, 0xffffffff, 0x96940200,
697         0x8708, 0xffffffff, 0x00900100,
698         0xc478, 0xffffffff, 0x00000080,
699         0xc404, 0xffffffff, 0x0020003f,
700         0x30, 0xffffffff, 0x0000001c,
701         0x34, 0x000f0000, 0x000f0000,
702         0x160c, 0xffffffff, 0x00000100,
703         0x1024, 0xffffffff, 0x00000100,
704         0x102c, 0x00000101, 0x00000000,
705         0x20a8, 0xffffffff, 0x00000104,
706         0x264c, 0x000c0000, 0x000c0000,
707         0x2648, 0x000c0000, 0x000c0000,
708         0x55e4, 0xff000fff, 0x00000100,
709         0x55e8, 0x00000001, 0x00000001,
710         0x2f50, 0x00000001, 0x00000001,
711         0x30cc, 0xc0000fff, 0x00000104,
712         0xc1e4, 0x00000001, 0x00000001,
713         0xd0c0, 0xfffffff0, 0x00000100,
714         0xd8c0, 0xfffffff0, 0x00000100
715 };
716
717 static const u32 hainan_mgcg_cgcg_init[] =
718 {
719         0xc400, 0xffffffff, 0xfffffffc,
720         0x802c, 0xffffffff, 0xe0000000,
721         0x9a60, 0xffffffff, 0x00000100,
722         0x92a4, 0xffffffff, 0x00000100,
723         0xc164, 0xffffffff, 0x00000100,
724         0x9774, 0xffffffff, 0x00000100,
725         0x8984, 0xffffffff, 0x06000100,
726         0x8a18, 0xffffffff, 0x00000100,
727         0x92a0, 0xffffffff, 0x00000100,
728         0xc380, 0xffffffff, 0x00000100,
729         0x8b28, 0xffffffff, 0x00000100,
730         0x9144, 0xffffffff, 0x00000100,
731         0x8d88, 0xffffffff, 0x00000100,
732         0x8d8c, 0xffffffff, 0x00000100,
733         0x9030, 0xffffffff, 0x00000100,
734         0x9034, 0xffffffff, 0x00000100,
735         0x9038, 0xffffffff, 0x00000100,
736         0x903c, 0xffffffff, 0x00000100,
737         0xad80, 0xffffffff, 0x00000100,
738         0xac54, 0xffffffff, 0x00000100,
739         0x897c, 0xffffffff, 0x06000100,
740         0x9868, 0xffffffff, 0x00000100,
741         0x9510, 0xffffffff, 0x00000100,
742         0xaf04, 0xffffffff, 0x00000100,
743         0xae04, 0xffffffff, 0x00000100,
744         0x949c, 0xffffffff, 0x00000100,
745         0x802c, 0xffffffff, 0xe0000000,
746         0x9160, 0xffffffff, 0x00010000,
747         0x9164, 0xffffffff, 0x00030002,
748         0x9168, 0xffffffff, 0x00040007,
749         0x916c, 0xffffffff, 0x00060005,
750         0x9170, 0xffffffff, 0x00090008,
751         0x9174, 0xffffffff, 0x00020001,
752         0x9178, 0xffffffff, 0x00040003,
753         0x917c, 0xffffffff, 0x00000007,
754         0x9180, 0xffffffff, 0x00060005,
755         0x9184, 0xffffffff, 0x00090008,
756         0x9188, 0xffffffff, 0x00030002,
757         0x918c, 0xffffffff, 0x00050004,
758         0x9190, 0xffffffff, 0x00000008,
759         0x9194, 0xffffffff, 0x00070006,
760         0x9198, 0xffffffff, 0x000a0009,
761         0x919c, 0xffffffff, 0x00040003,
762         0x91a0, 0xffffffff, 0x00060005,
763         0x91a4, 0xffffffff, 0x00000009,
764         0x91a8, 0xffffffff, 0x00080007,
765         0x91ac, 0xffffffff, 0x000b000a,
766         0x91b0, 0xffffffff, 0x00050004,
767         0x91b4, 0xffffffff, 0x00070006,
768         0x91b8, 0xffffffff, 0x0008000b,
769         0x91bc, 0xffffffff, 0x000a0009,
770         0x91c0, 0xffffffff, 0x000d000c,
771         0x91c4, 0xffffffff, 0x00060005,
772         0x91c8, 0xffffffff, 0x00080007,
773         0x91cc, 0xffffffff, 0x0000000b,
774         0x91d0, 0xffffffff, 0x000a0009,
775         0x91d4, 0xffffffff, 0x000d000c,
776         0x9150, 0xffffffff, 0x96940200,
777         0x8708, 0xffffffff, 0x00900100,
778         0xc478, 0xffffffff, 0x00000080,
779         0xc404, 0xffffffff, 0x0020003f,
780         0x30, 0xffffffff, 0x0000001c,
781         0x34, 0x000f0000, 0x000f0000,
782         0x160c, 0xffffffff, 0x00000100,
783         0x1024, 0xffffffff, 0x00000100,
784         0x20a8, 0xffffffff, 0x00000104,
785         0x264c, 0x000c0000, 0x000c0000,
786         0x2648, 0x000c0000, 0x000c0000,
787         0x2f50, 0x00000001, 0x00000001,
788         0x30cc, 0xc0000fff, 0x00000104,
789         0xc1e4, 0x00000001, 0x00000001,
790         0xd0c0, 0xfffffff0, 0x00000100,
791         0xd8c0, 0xfffffff0, 0x00000100
792 };
793
794 static u32 verde_pg_init[] =
795 {
796         0x353c, 0xffffffff, 0x40000,
797         0x3538, 0xffffffff, 0x200010ff,
798         0x353c, 0xffffffff, 0x0,
799         0x353c, 0xffffffff, 0x0,
800         0x353c, 0xffffffff, 0x0,
801         0x353c, 0xffffffff, 0x0,
802         0x353c, 0xffffffff, 0x0,
803         0x353c, 0xffffffff, 0x7007,
804         0x3538, 0xffffffff, 0x300010ff,
805         0x353c, 0xffffffff, 0x0,
806         0x353c, 0xffffffff, 0x0,
807         0x353c, 0xffffffff, 0x0,
808         0x353c, 0xffffffff, 0x0,
809         0x353c, 0xffffffff, 0x0,
810         0x353c, 0xffffffff, 0x400000,
811         0x3538, 0xffffffff, 0x100010ff,
812         0x353c, 0xffffffff, 0x0,
813         0x353c, 0xffffffff, 0x0,
814         0x353c, 0xffffffff, 0x0,
815         0x353c, 0xffffffff, 0x0,
816         0x353c, 0xffffffff, 0x0,
817         0x353c, 0xffffffff, 0x120200,
818         0x3538, 0xffffffff, 0x500010ff,
819         0x353c, 0xffffffff, 0x0,
820         0x353c, 0xffffffff, 0x0,
821         0x353c, 0xffffffff, 0x0,
822         0x353c, 0xffffffff, 0x0,
823         0x353c, 0xffffffff, 0x0,
824         0x353c, 0xffffffff, 0x1e1e16,
825         0x3538, 0xffffffff, 0x600010ff,
826         0x353c, 0xffffffff, 0x0,
827         0x353c, 0xffffffff, 0x0,
828         0x353c, 0xffffffff, 0x0,
829         0x353c, 0xffffffff, 0x0,
830         0x353c, 0xffffffff, 0x0,
831         0x353c, 0xffffffff, 0x171f1e,
832         0x3538, 0xffffffff, 0x700010ff,
833         0x353c, 0xffffffff, 0x0,
834         0x353c, 0xffffffff, 0x0,
835         0x353c, 0xffffffff, 0x0,
836         0x353c, 0xffffffff, 0x0,
837         0x353c, 0xffffffff, 0x0,
838         0x353c, 0xffffffff, 0x0,
839         0x3538, 0xffffffff, 0x9ff,
840         0x3500, 0xffffffff, 0x0,
841         0x3504, 0xffffffff, 0x10000800,
842         0x3504, 0xffffffff, 0xf,
843         0x3504, 0xffffffff, 0xf,
844         0x3500, 0xffffffff, 0x4,
845         0x3504, 0xffffffff, 0x1000051e,
846         0x3504, 0xffffffff, 0xffff,
847         0x3504, 0xffffffff, 0xffff,
848         0x3500, 0xffffffff, 0x8,
849         0x3504, 0xffffffff, 0x80500,
850         0x3500, 0xffffffff, 0x12,
851         0x3504, 0xffffffff, 0x9050c,
852         0x3500, 0xffffffff, 0x1d,
853         0x3504, 0xffffffff, 0xb052c,
854         0x3500, 0xffffffff, 0x2a,
855         0x3504, 0xffffffff, 0x1053e,
856         0x3500, 0xffffffff, 0x2d,
857         0x3504, 0xffffffff, 0x10546,
858         0x3500, 0xffffffff, 0x30,
859         0x3504, 0xffffffff, 0xa054e,
860         0x3500, 0xffffffff, 0x3c,
861         0x3504, 0xffffffff, 0x1055f,
862         0x3500, 0xffffffff, 0x3f,
863         0x3504, 0xffffffff, 0x10567,
864         0x3500, 0xffffffff, 0x42,
865         0x3504, 0xffffffff, 0x1056f,
866         0x3500, 0xffffffff, 0x45,
867         0x3504, 0xffffffff, 0x10572,
868         0x3500, 0xffffffff, 0x48,
869         0x3504, 0xffffffff, 0x20575,
870         0x3500, 0xffffffff, 0x4c,
871         0x3504, 0xffffffff, 0x190801,
872         0x3500, 0xffffffff, 0x67,
873         0x3504, 0xffffffff, 0x1082a,
874         0x3500, 0xffffffff, 0x6a,
875         0x3504, 0xffffffff, 0x1b082d,
876         0x3500, 0xffffffff, 0x87,
877         0x3504, 0xffffffff, 0x310851,
878         0x3500, 0xffffffff, 0xba,
879         0x3504, 0xffffffff, 0x891,
880         0x3500, 0xffffffff, 0xbc,
881         0x3504, 0xffffffff, 0x893,
882         0x3500, 0xffffffff, 0xbe,
883         0x3504, 0xffffffff, 0x20895,
884         0x3500, 0xffffffff, 0xc2,
885         0x3504, 0xffffffff, 0x20899,
886         0x3500, 0xffffffff, 0xc6,
887         0x3504, 0xffffffff, 0x2089d,
888         0x3500, 0xffffffff, 0xca,
889         0x3504, 0xffffffff, 0x8a1,
890         0x3500, 0xffffffff, 0xcc,
891         0x3504, 0xffffffff, 0x8a3,
892         0x3500, 0xffffffff, 0xce,
893         0x3504, 0xffffffff, 0x308a5,
894         0x3500, 0xffffffff, 0xd3,
895         0x3504, 0xffffffff, 0x6d08cd,
896         0x3500, 0xffffffff, 0x142,
897         0x3504, 0xffffffff, 0x2000095a,
898         0x3504, 0xffffffff, 0x1,
899         0x3500, 0xffffffff, 0x144,
900         0x3504, 0xffffffff, 0x301f095b,
901         0x3500, 0xffffffff, 0x165,
902         0x3504, 0xffffffff, 0xc094d,
903         0x3500, 0xffffffff, 0x173,
904         0x3504, 0xffffffff, 0xf096d,
905         0x3500, 0xffffffff, 0x184,
906         0x3504, 0xffffffff, 0x15097f,
907         0x3500, 0xffffffff, 0x19b,
908         0x3504, 0xffffffff, 0xc0998,
909         0x3500, 0xffffffff, 0x1a9,
910         0x3504, 0xffffffff, 0x409a7,
911         0x3500, 0xffffffff, 0x1af,
912         0x3504, 0xffffffff, 0xcdc,
913         0x3500, 0xffffffff, 0x1b1,
914         0x3504, 0xffffffff, 0x800,
915         0x3508, 0xffffffff, 0x6c9b2000,
916         0x3510, 0xfc00, 0x2000,
917         0x3544, 0xffffffff, 0xfc0,
918         0x28d4, 0x00000100, 0x100
919 };
920
921 static void si_init_golden_registers(struct radeon_device *rdev)
922 {
923         switch (rdev->family) {
924         case CHIP_TAHITI:
925                 radeon_program_register_sequence(rdev,
926                                                  tahiti_golden_registers,
927                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
928                 radeon_program_register_sequence(rdev,
929                                                  tahiti_golden_rlc_registers,
930                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
931                 radeon_program_register_sequence(rdev,
932                                                  tahiti_mgcg_cgcg_init,
933                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
934                 radeon_program_register_sequence(rdev,
935                                                  tahiti_golden_registers2,
936                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
937                 break;
938         case CHIP_PITCAIRN:
939                 radeon_program_register_sequence(rdev,
940                                                  pitcairn_golden_registers,
941                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
942                 radeon_program_register_sequence(rdev,
943                                                  pitcairn_golden_rlc_registers,
944                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
945                 radeon_program_register_sequence(rdev,
946                                                  pitcairn_mgcg_cgcg_init,
947                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
948                 break;
949         case CHIP_VERDE:
950                 radeon_program_register_sequence(rdev,
951                                                  verde_golden_registers,
952                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
953                 radeon_program_register_sequence(rdev,
954                                                  verde_golden_rlc_registers,
955                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
956                 radeon_program_register_sequence(rdev,
957                                                  verde_mgcg_cgcg_init,
958                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
959                 radeon_program_register_sequence(rdev,
960                                                  verde_pg_init,
961                                                  (const u32)ARRAY_SIZE(verde_pg_init));
962                 break;
963         case CHIP_OLAND:
964                 radeon_program_register_sequence(rdev,
965                                                  oland_golden_registers,
966                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
967                 radeon_program_register_sequence(rdev,
968                                                  oland_golden_rlc_registers,
969                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
970                 radeon_program_register_sequence(rdev,
971                                                  oland_mgcg_cgcg_init,
972                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
973                 break;
974         case CHIP_HAINAN:
975                 radeon_program_register_sequence(rdev,
976                                                  hainan_golden_registers,
977                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
978                 radeon_program_register_sequence(rdev,
979                                                  hainan_golden_registers2,
980                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
981                 radeon_program_register_sequence(rdev,
982                                                  hainan_mgcg_cgcg_init,
983                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
984                 break;
985         default:
986                 break;
987         }
988 }
989
990 #define PCIE_BUS_CLK                10000
991 #define TCLK                        (PCIE_BUS_CLK / 10)
992
993 /**
994  * si_get_xclk - get the xclk
995  *
996  * @rdev: radeon_device pointer
997  *
998  * Returns the reference clock used by the gfx engine
999  * (SI).
1000  */
1001 u32 si_get_xclk(struct radeon_device *rdev)
1002 {
1003         u32 reference_clock = rdev->clock.spll.reference_freq;
1004         u32 tmp;
1005
1006         tmp = RREG32(CG_CLKPIN_CNTL_2);
1007         if (tmp & MUX_TCLK_TO_XCLK)
1008                 return TCLK;
1009
1010         tmp = RREG32(CG_CLKPIN_CNTL);
1011         if (tmp & XTALIN_DIVIDE)
1012                 return reference_clock / 4;
1013
1014         return reference_clock;
1015 }
1016
1017 /* get temperature in millidegrees */
1018 int si_get_temp(struct radeon_device *rdev)
1019 {
1020         u32 temp;
1021         int actual_temp = 0;
1022
1023         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1024                 CTF_TEMP_SHIFT;
1025
1026         if (temp & 0x200)
1027                 actual_temp = 255;
1028         else
1029                 actual_temp = temp & 0x1ff;
1030
1031         actual_temp = (actual_temp * 1000);
1032
1033         return actual_temp;
1034 }
1035
1036 #define TAHITI_IO_MC_REGS_SIZE 36
1037
1038 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1039         {0x0000006f, 0x03044000},
1040         {0x00000070, 0x0480c018},
1041         {0x00000071, 0x00000040},
1042         {0x00000072, 0x01000000},
1043         {0x00000074, 0x000000ff},
1044         {0x00000075, 0x00143400},
1045         {0x00000076, 0x08ec0800},
1046         {0x00000077, 0x040000cc},
1047         {0x00000079, 0x00000000},
1048         {0x0000007a, 0x21000409},
1049         {0x0000007c, 0x00000000},
1050         {0x0000007d, 0xe8000000},
1051         {0x0000007e, 0x044408a8},
1052         {0x0000007f, 0x00000003},
1053         {0x00000080, 0x00000000},
1054         {0x00000081, 0x01000000},
1055         {0x00000082, 0x02000000},
1056         {0x00000083, 0x00000000},
1057         {0x00000084, 0xe3f3e4f4},
1058         {0x00000085, 0x00052024},
1059         {0x00000087, 0x00000000},
1060         {0x00000088, 0x66036603},
1061         {0x00000089, 0x01000000},
1062         {0x0000008b, 0x1c0a0000},
1063         {0x0000008c, 0xff010000},
1064         {0x0000008e, 0xffffefff},
1065         {0x0000008f, 0xfff3efff},
1066         {0x00000090, 0xfff3efbf},
1067         {0x00000094, 0x00101101},
1068         {0x00000095, 0x00000fff},
1069         {0x00000096, 0x00116fff},
1070         {0x00000097, 0x60010000},
1071         {0x00000098, 0x10010000},
1072         {0x00000099, 0x00006000},
1073         {0x0000009a, 0x00001000},
1074         {0x0000009f, 0x00a77400}
1075 };
1076
1077 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1078         {0x0000006f, 0x03044000},
1079         {0x00000070, 0x0480c018},
1080         {0x00000071, 0x00000040},
1081         {0x00000072, 0x01000000},
1082         {0x00000074, 0x000000ff},
1083         {0x00000075, 0x00143400},
1084         {0x00000076, 0x08ec0800},
1085         {0x00000077, 0x040000cc},
1086         {0x00000079, 0x00000000},
1087         {0x0000007a, 0x21000409},
1088         {0x0000007c, 0x00000000},
1089         {0x0000007d, 0xe8000000},
1090         {0x0000007e, 0x044408a8},
1091         {0x0000007f, 0x00000003},
1092         {0x00000080, 0x00000000},
1093         {0x00000081, 0x01000000},
1094         {0x00000082, 0x02000000},
1095         {0x00000083, 0x00000000},
1096         {0x00000084, 0xe3f3e4f4},
1097         {0x00000085, 0x00052024},
1098         {0x00000087, 0x00000000},
1099         {0x00000088, 0x66036603},
1100         {0x00000089, 0x01000000},
1101         {0x0000008b, 0x1c0a0000},
1102         {0x0000008c, 0xff010000},
1103         {0x0000008e, 0xffffefff},
1104         {0x0000008f, 0xfff3efff},
1105         {0x00000090, 0xfff3efbf},
1106         {0x00000094, 0x00101101},
1107         {0x00000095, 0x00000fff},
1108         {0x00000096, 0x00116fff},
1109         {0x00000097, 0x60010000},
1110         {0x00000098, 0x10010000},
1111         {0x00000099, 0x00006000},
1112         {0x0000009a, 0x00001000},
1113         {0x0000009f, 0x00a47400}
1114 };
1115
1116 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1117         {0x0000006f, 0x03044000},
1118         {0x00000070, 0x0480c018},
1119         {0x00000071, 0x00000040},
1120         {0x00000072, 0x01000000},
1121         {0x00000074, 0x000000ff},
1122         {0x00000075, 0x00143400},
1123         {0x00000076, 0x08ec0800},
1124         {0x00000077, 0x040000cc},
1125         {0x00000079, 0x00000000},
1126         {0x0000007a, 0x21000409},
1127         {0x0000007c, 0x00000000},
1128         {0x0000007d, 0xe8000000},
1129         {0x0000007e, 0x044408a8},
1130         {0x0000007f, 0x00000003},
1131         {0x00000080, 0x00000000},
1132         {0x00000081, 0x01000000},
1133         {0x00000082, 0x02000000},
1134         {0x00000083, 0x00000000},
1135         {0x00000084, 0xe3f3e4f4},
1136         {0x00000085, 0x00052024},
1137         {0x00000087, 0x00000000},
1138         {0x00000088, 0x66036603},
1139         {0x00000089, 0x01000000},
1140         {0x0000008b, 0x1c0a0000},
1141         {0x0000008c, 0xff010000},
1142         {0x0000008e, 0xffffefff},
1143         {0x0000008f, 0xfff3efff},
1144         {0x00000090, 0xfff3efbf},
1145         {0x00000094, 0x00101101},
1146         {0x00000095, 0x00000fff},
1147         {0x00000096, 0x00116fff},
1148         {0x00000097, 0x60010000},
1149         {0x00000098, 0x10010000},
1150         {0x00000099, 0x00006000},
1151         {0x0000009a, 0x00001000},
1152         {0x0000009f, 0x00a37400}
1153 };
1154
1155 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1156         {0x0000006f, 0x03044000},
1157         {0x00000070, 0x0480c018},
1158         {0x00000071, 0x00000040},
1159         {0x00000072, 0x01000000},
1160         {0x00000074, 0x000000ff},
1161         {0x00000075, 0x00143400},
1162         {0x00000076, 0x08ec0800},
1163         {0x00000077, 0x040000cc},
1164         {0x00000079, 0x00000000},
1165         {0x0000007a, 0x21000409},
1166         {0x0000007c, 0x00000000},
1167         {0x0000007d, 0xe8000000},
1168         {0x0000007e, 0x044408a8},
1169         {0x0000007f, 0x00000003},
1170         {0x00000080, 0x00000000},
1171         {0x00000081, 0x01000000},
1172         {0x00000082, 0x02000000},
1173         {0x00000083, 0x00000000},
1174         {0x00000084, 0xe3f3e4f4},
1175         {0x00000085, 0x00052024},
1176         {0x00000087, 0x00000000},
1177         {0x00000088, 0x66036603},
1178         {0x00000089, 0x01000000},
1179         {0x0000008b, 0x1c0a0000},
1180         {0x0000008c, 0xff010000},
1181         {0x0000008e, 0xffffefff},
1182         {0x0000008f, 0xfff3efff},
1183         {0x00000090, 0xfff3efbf},
1184         {0x00000094, 0x00101101},
1185         {0x00000095, 0x00000fff},
1186         {0x00000096, 0x00116fff},
1187         {0x00000097, 0x60010000},
1188         {0x00000098, 0x10010000},
1189         {0x00000099, 0x00006000},
1190         {0x0000009a, 0x00001000},
1191         {0x0000009f, 0x00a17730}
1192 };
1193
1194 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1195         {0x0000006f, 0x03044000},
1196         {0x00000070, 0x0480c018},
1197         {0x00000071, 0x00000040},
1198         {0x00000072, 0x01000000},
1199         {0x00000074, 0x000000ff},
1200         {0x00000075, 0x00143400},
1201         {0x00000076, 0x08ec0800},
1202         {0x00000077, 0x040000cc},
1203         {0x00000079, 0x00000000},
1204         {0x0000007a, 0x21000409},
1205         {0x0000007c, 0x00000000},
1206         {0x0000007d, 0xe8000000},
1207         {0x0000007e, 0x044408a8},
1208         {0x0000007f, 0x00000003},
1209         {0x00000080, 0x00000000},
1210         {0x00000081, 0x01000000},
1211         {0x00000082, 0x02000000},
1212         {0x00000083, 0x00000000},
1213         {0x00000084, 0xe3f3e4f4},
1214         {0x00000085, 0x00052024},
1215         {0x00000087, 0x00000000},
1216         {0x00000088, 0x66036603},
1217         {0x00000089, 0x01000000},
1218         {0x0000008b, 0x1c0a0000},
1219         {0x0000008c, 0xff010000},
1220         {0x0000008e, 0xffffefff},
1221         {0x0000008f, 0xfff3efff},
1222         {0x00000090, 0xfff3efbf},
1223         {0x00000094, 0x00101101},
1224         {0x00000095, 0x00000fff},
1225         {0x00000096, 0x00116fff},
1226         {0x00000097, 0x60010000},
1227         {0x00000098, 0x10010000},
1228         {0x00000099, 0x00006000},
1229         {0x0000009a, 0x00001000},
1230         {0x0000009f, 0x00a07730}
1231 };
1232
1233 /* ucode loading */
1234 static int si_mc_load_microcode(struct radeon_device *rdev)
1235 {
1236         const __be32 *fw_data;
1237         u32 running, blackout = 0;
1238         u32 *io_mc_regs;
1239         int i, ucode_size, regs_size;
1240
1241         if (!rdev->mc_fw)
1242                 return -EINVAL;
1243
1244         switch (rdev->family) {
1245         case CHIP_TAHITI:
1246                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1247                 ucode_size = SI_MC_UCODE_SIZE;
1248                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1249                 break;
1250         case CHIP_PITCAIRN:
1251                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1252                 ucode_size = SI_MC_UCODE_SIZE;
1253                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1254                 break;
1255         case CHIP_VERDE:
1256         default:
1257                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1258                 ucode_size = SI_MC_UCODE_SIZE;
1259                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1260                 break;
1261         case CHIP_OLAND:
1262                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1263                 ucode_size = OLAND_MC_UCODE_SIZE;
1264                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1265                 break;
1266         case CHIP_HAINAN:
1267                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1268                 ucode_size = OLAND_MC_UCODE_SIZE;
1269                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1270                 break;
1271         }
1272
1273         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1274
1275         if (running == 0) {
1276                 if (running) {
1277                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1278                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1279                 }
1280
1281                 /* reset the engine and set to writable */
1282                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1283                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1284
1285                 /* load mc io regs */
1286                 for (i = 0; i < regs_size; i++) {
1287                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1288                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1289                 }
1290                 /* load the MC ucode */
1291                 fw_data = (const __be32 *)rdev->mc_fw->data;
1292                 for (i = 0; i < ucode_size; i++)
1293                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1294
1295                 /* put the engine back into the active state */
1296                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1297                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1298                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1299
1300                 /* wait for training to complete */
1301                 for (i = 0; i < rdev->usec_timeout; i++) {
1302                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1303                                 break;
1304                         udelay(1);
1305                 }
1306                 for (i = 0; i < rdev->usec_timeout; i++) {
1307                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1308                                 break;
1309                         udelay(1);
1310                 }
1311
1312                 if (running)
1313                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1314         }
1315
1316         return 0;
1317 }
1318
1319 static int si_init_microcode(struct radeon_device *rdev)
1320 {
1321         struct platform_device *pdev;
1322         const char *chip_name;
1323         const char *rlc_chip_name;
1324         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1325         char fw_name[30];
1326         int err;
1327
1328         DRM_DEBUG("\n");
1329
1330         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1331         err = IS_ERR(pdev);
1332         if (err) {
1333                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1334                 return -EINVAL;
1335         }
1336
1337         switch (rdev->family) {
1338         case CHIP_TAHITI:
1339                 chip_name = "TAHITI";
1340                 rlc_chip_name = "TAHITI";
1341                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1342                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1343                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1344                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1345                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1346                 break;
1347         case CHIP_PITCAIRN:
1348                 chip_name = "PITCAIRN";
1349                 rlc_chip_name = "PITCAIRN";
1350                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1351                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1352                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1353                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1354                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1355                 break;
1356         case CHIP_VERDE:
1357                 chip_name = "VERDE";
1358                 rlc_chip_name = "VERDE";
1359                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1360                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1361                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1362                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1363                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1364                 break;
1365         case CHIP_OLAND:
1366                 chip_name = "OLAND";
1367                 rlc_chip_name = "OLAND";
1368                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1369                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1370                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1371                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1372                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1373                 break;
1374         case CHIP_HAINAN:
1375                 chip_name = "HAINAN";
1376                 rlc_chip_name = "HAINAN";
1377                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1378                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1379                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1380                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1381                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1382                 break;
1383         default: BUG();
1384         }
1385
1386         DRM_INFO("Loading %s Microcode\n", chip_name);
1387
1388         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1389         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1390         if (err)
1391                 goto out;
1392         if (rdev->pfp_fw->size != pfp_req_size) {
1393                 printk(KERN_ERR
1394                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1395                        rdev->pfp_fw->size, fw_name);
1396                 err = -EINVAL;
1397                 goto out;
1398         }
1399
1400         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1401         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1402         if (err)
1403                 goto out;
1404         if (rdev->me_fw->size != me_req_size) {
1405                 printk(KERN_ERR
1406                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1407                        rdev->me_fw->size, fw_name);
1408                 err = -EINVAL;
1409         }
1410
1411         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1412         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1413         if (err)
1414                 goto out;
1415         if (rdev->ce_fw->size != ce_req_size) {
1416                 printk(KERN_ERR
1417                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1418                        rdev->ce_fw->size, fw_name);
1419                 err = -EINVAL;
1420         }
1421
1422         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1423         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1424         if (err)
1425                 goto out;
1426         if (rdev->rlc_fw->size != rlc_req_size) {
1427                 printk(KERN_ERR
1428                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1429                        rdev->rlc_fw->size, fw_name);
1430                 err = -EINVAL;
1431         }
1432
1433         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1434         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1435         if (err)
1436                 goto out;
1437         if (rdev->mc_fw->size != mc_req_size) {
1438                 printk(KERN_ERR
1439                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1440                        rdev->mc_fw->size, fw_name);
1441                 err = -EINVAL;
1442         }
1443
1444 out:
1445         platform_device_unregister(pdev);
1446
1447         if (err) {
1448                 if (err != -EINVAL)
1449                         printk(KERN_ERR
1450                                "si_cp: Failed to load firmware \"%s\"\n",
1451                                fw_name);
1452                 release_firmware(rdev->pfp_fw);
1453                 rdev->pfp_fw = NULL;
1454                 release_firmware(rdev->me_fw);
1455                 rdev->me_fw = NULL;
1456                 release_firmware(rdev->ce_fw);
1457                 rdev->ce_fw = NULL;
1458                 release_firmware(rdev->rlc_fw);
1459                 rdev->rlc_fw = NULL;
1460                 release_firmware(rdev->mc_fw);
1461                 rdev->mc_fw = NULL;
1462         }
1463         return err;
1464 }
1465
1466 /* watermark setup */
1467 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1468                                    struct radeon_crtc *radeon_crtc,
1469                                    struct drm_display_mode *mode,
1470                                    struct drm_display_mode *other_mode)
1471 {
1472         u32 tmp;
1473         /*
1474          * Line Buffer Setup
1475          * There are 3 line buffers, each one shared by 2 display controllers.
1476          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1477          * the display controllers.  The paritioning is done via one of four
1478          * preset allocations specified in bits 21:20:
1479          *  0 - half lb
1480          *  2 - whole lb, other crtc must be disabled
1481          */
1482         /* this can get tricky if we have two large displays on a paired group
1483          * of crtcs.  Ideally for multiple large displays we'd assign them to
1484          * non-linked crtcs for maximum line buffer allocation.
1485          */
1486         if (radeon_crtc->base.enabled && mode) {
1487                 if (other_mode)
1488                         tmp = 0; /* 1/2 */
1489                 else
1490                         tmp = 2; /* whole */
1491         } else
1492                 tmp = 0;
1493
1494         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1495                DC_LB_MEMORY_CONFIG(tmp));
1496
1497         if (radeon_crtc->base.enabled && mode) {
1498                 switch (tmp) {
1499                 case 0:
1500                 default:
1501                         return 4096 * 2;
1502                 case 2:
1503                         return 8192 * 2;
1504                 }
1505         }
1506
1507         /* controller not enabled, so no lb used */
1508         return 0;
1509 }
1510
1511 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1512 {
1513         u32 tmp = RREG32(MC_SHARED_CHMAP);
1514
1515         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1516         case 0:
1517         default:
1518                 return 1;
1519         case 1:
1520                 return 2;
1521         case 2:
1522                 return 4;
1523         case 3:
1524                 return 8;
1525         case 4:
1526                 return 3;
1527         case 5:
1528                 return 6;
1529         case 6:
1530                 return 10;
1531         case 7:
1532                 return 12;
1533         case 8:
1534                 return 16;
1535         }
1536 }
1537
1538 struct dce6_wm_params {
1539         u32 dram_channels; /* number of dram channels */
1540         u32 yclk;          /* bandwidth per dram data pin in kHz */
1541         u32 sclk;          /* engine clock in kHz */
1542         u32 disp_clk;      /* display clock in kHz */
1543         u32 src_width;     /* viewport width */
1544         u32 active_time;   /* active display time in ns */
1545         u32 blank_time;    /* blank time in ns */
1546         bool interlaced;    /* mode is interlaced */
1547         fixed20_12 vsc;    /* vertical scale ratio */
1548         u32 num_heads;     /* number of active crtcs */
1549         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1550         u32 lb_size;       /* line buffer allocated to pipe */
1551         u32 vtaps;         /* vertical scaler taps */
1552 };
1553
1554 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1555 {
1556         /* Calculate raw DRAM Bandwidth */
1557         fixed20_12 dram_efficiency; /* 0.7 */
1558         fixed20_12 yclk, dram_channels, bandwidth;
1559         fixed20_12 a;
1560
1561         a.full = dfixed_const(1000);
1562         yclk.full = dfixed_const(wm->yclk);
1563         yclk.full = dfixed_div(yclk, a);
1564         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1565         a.full = dfixed_const(10);
1566         dram_efficiency.full = dfixed_const(7);
1567         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1568         bandwidth.full = dfixed_mul(dram_channels, yclk);
1569         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1570
1571         return dfixed_trunc(bandwidth);
1572 }
1573
1574 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1575 {
1576         /* Calculate DRAM Bandwidth and the part allocated to display. */
1577         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1578         fixed20_12 yclk, dram_channels, bandwidth;
1579         fixed20_12 a;
1580
1581         a.full = dfixed_const(1000);
1582         yclk.full = dfixed_const(wm->yclk);
1583         yclk.full = dfixed_div(yclk, a);
1584         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1585         a.full = dfixed_const(10);
1586         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1587         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1588         bandwidth.full = dfixed_mul(dram_channels, yclk);
1589         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1590
1591         return dfixed_trunc(bandwidth);
1592 }
1593
1594 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1595 {
1596         /* Calculate the display Data return Bandwidth */
1597         fixed20_12 return_efficiency; /* 0.8 */
1598         fixed20_12 sclk, bandwidth;
1599         fixed20_12 a;
1600
1601         a.full = dfixed_const(1000);
1602         sclk.full = dfixed_const(wm->sclk);
1603         sclk.full = dfixed_div(sclk, a);
1604         a.full = dfixed_const(10);
1605         return_efficiency.full = dfixed_const(8);
1606         return_efficiency.full = dfixed_div(return_efficiency, a);
1607         a.full = dfixed_const(32);
1608         bandwidth.full = dfixed_mul(a, sclk);
1609         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1610
1611         return dfixed_trunc(bandwidth);
1612 }
1613
1614 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1615 {
1616         return 32;
1617 }
1618
1619 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1620 {
1621         /* Calculate the DMIF Request Bandwidth */
1622         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1623         fixed20_12 disp_clk, sclk, bandwidth;
1624         fixed20_12 a, b1, b2;
1625         u32 min_bandwidth;
1626
1627         a.full = dfixed_const(1000);
1628         disp_clk.full = dfixed_const(wm->disp_clk);
1629         disp_clk.full = dfixed_div(disp_clk, a);
1630         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1631         b1.full = dfixed_mul(a, disp_clk);
1632
1633         a.full = dfixed_const(1000);
1634         sclk.full = dfixed_const(wm->sclk);
1635         sclk.full = dfixed_div(sclk, a);
1636         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1637         b2.full = dfixed_mul(a, sclk);
1638
1639         a.full = dfixed_const(10);
1640         disp_clk_request_efficiency.full = dfixed_const(8);
1641         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1642
1643         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1644
1645         a.full = dfixed_const(min_bandwidth);
1646         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1647
1648         return dfixed_trunc(bandwidth);
1649 }
1650
1651 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1652 {
1653         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1654         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1655         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1656         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1657
1658         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1659 }
1660
1661 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1662 {
1663         /* Calculate the display mode Average Bandwidth
1664          * DisplayMode should contain the source and destination dimensions,
1665          * timing, etc.
1666          */
1667         fixed20_12 bpp;
1668         fixed20_12 line_time;
1669         fixed20_12 src_width;
1670         fixed20_12 bandwidth;
1671         fixed20_12 a;
1672
1673         a.full = dfixed_const(1000);
1674         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1675         line_time.full = dfixed_div(line_time, a);
1676         bpp.full = dfixed_const(wm->bytes_per_pixel);
1677         src_width.full = dfixed_const(wm->src_width);
1678         bandwidth.full = dfixed_mul(src_width, bpp);
1679         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1680         bandwidth.full = dfixed_div(bandwidth, line_time);
1681
1682         return dfixed_trunc(bandwidth);
1683 }
1684
1685 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1686 {
1687         /* First calcualte the latency in ns */
1688         u32 mc_latency = 2000; /* 2000 ns. */
1689         u32 available_bandwidth = dce6_available_bandwidth(wm);
1690         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1691         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1692         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1693         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1694                 (wm->num_heads * cursor_line_pair_return_time);
1695         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1696         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1697         u32 tmp, dmif_size = 12288;
1698         fixed20_12 a, b, c;
1699
1700         if (wm->num_heads == 0)
1701                 return 0;
1702
1703         a.full = dfixed_const(2);
1704         b.full = dfixed_const(1);
1705         if ((wm->vsc.full > a.full) ||
1706             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1707             (wm->vtaps >= 5) ||
1708             ((wm->vsc.full >= a.full) && wm->interlaced))
1709                 max_src_lines_per_dst_line = 4;
1710         else
1711                 max_src_lines_per_dst_line = 2;
1712
1713         a.full = dfixed_const(available_bandwidth);
1714         b.full = dfixed_const(wm->num_heads);
1715         a.full = dfixed_div(a, b);
1716
1717         b.full = dfixed_const(mc_latency + 512);
1718         c.full = dfixed_const(wm->disp_clk);
1719         b.full = dfixed_div(b, c);
1720
1721         c.full = dfixed_const(dmif_size);
1722         b.full = dfixed_div(c, b);
1723
1724         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1725
1726         b.full = dfixed_const(1000);
1727         c.full = dfixed_const(wm->disp_clk);
1728         b.full = dfixed_div(c, b);
1729         c.full = dfixed_const(wm->bytes_per_pixel);
1730         b.full = dfixed_mul(b, c);
1731
1732         lb_fill_bw = min(tmp, dfixed_trunc(b));
1733
1734         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1735         b.full = dfixed_const(1000);
1736         c.full = dfixed_const(lb_fill_bw);
1737         b.full = dfixed_div(c, b);
1738         a.full = dfixed_div(a, b);
1739         line_fill_time = dfixed_trunc(a);
1740
1741         if (line_fill_time < wm->active_time)
1742                 return latency;
1743         else
1744                 return latency + (line_fill_time - wm->active_time);
1745
1746 }
1747
1748 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1749 {
1750         if (dce6_average_bandwidth(wm) <=
1751             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1752                 return true;
1753         else
1754                 return false;
1755 };
1756
1757 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1758 {
1759         if (dce6_average_bandwidth(wm) <=
1760             (dce6_available_bandwidth(wm) / wm->num_heads))
1761                 return true;
1762         else
1763                 return false;
1764 };
1765
1766 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1767 {
1768         u32 lb_partitions = wm->lb_size / wm->src_width;
1769         u32 line_time = wm->active_time + wm->blank_time;
1770         u32 latency_tolerant_lines;
1771         u32 latency_hiding;
1772         fixed20_12 a;
1773
1774         a.full = dfixed_const(1);
1775         if (wm->vsc.full > a.full)
1776                 latency_tolerant_lines = 1;
1777         else {
1778                 if (lb_partitions <= (wm->vtaps + 1))
1779                         latency_tolerant_lines = 1;
1780                 else
1781                         latency_tolerant_lines = 2;
1782         }
1783
1784         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1785
1786         if (dce6_latency_watermark(wm) <= latency_hiding)
1787                 return true;
1788         else
1789                 return false;
1790 }
1791
1792 static void dce6_program_watermarks(struct radeon_device *rdev,
1793                                          struct radeon_crtc *radeon_crtc,
1794                                          u32 lb_size, u32 num_heads)
1795 {
1796         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1797         struct dce6_wm_params wm_low, wm_high;
1798         u32 dram_channels;
1799         u32 pixel_period;
1800         u32 line_time = 0;
1801         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1802         u32 priority_a_mark = 0, priority_b_mark = 0;
1803         u32 priority_a_cnt = PRIORITY_OFF;
1804         u32 priority_b_cnt = PRIORITY_OFF;
1805         u32 tmp, arb_control3;
1806         fixed20_12 a, b, c;
1807
1808         if (radeon_crtc->base.enabled && num_heads && mode) {
1809                 pixel_period = 1000000 / (u32)mode->clock;
1810                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1811                 priority_a_cnt = 0;
1812                 priority_b_cnt = 0;
1813
1814                 if (rdev->family == CHIP_ARUBA)
1815                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
1816                 else
1817                         dram_channels = si_get_number_of_dram_channels(rdev);
1818
1819                 /* watermark for high clocks */
1820                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
1821                         wm_high.yclk =
1822                                 radeon_dpm_get_mclk(rdev, false) * 10;
1823                         wm_high.sclk =
1824                                 radeon_dpm_get_sclk(rdev, false) * 10;
1825                 } else {
1826                         wm_high.yclk = rdev->pm.current_mclk * 10;
1827                         wm_high.sclk = rdev->pm.current_sclk * 10;
1828                 }
1829
1830                 wm_high.disp_clk = mode->clock;
1831                 wm_high.src_width = mode->crtc_hdisplay;
1832                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
1833                 wm_high.blank_time = line_time - wm_high.active_time;
1834                 wm_high.interlaced = false;
1835                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1836                         wm_high.interlaced = true;
1837                 wm_high.vsc = radeon_crtc->vsc;
1838                 wm_high.vtaps = 1;
1839                 if (radeon_crtc->rmx_type != RMX_OFF)
1840                         wm_high.vtaps = 2;
1841                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
1842                 wm_high.lb_size = lb_size;
1843                 wm_high.dram_channels = dram_channels;
1844                 wm_high.num_heads = num_heads;
1845
1846                 /* watermark for low clocks */
1847                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
1848                         wm_low.yclk =
1849                                 radeon_dpm_get_mclk(rdev, true) * 10;
1850                         wm_low.sclk =
1851                                 radeon_dpm_get_sclk(rdev, true) * 10;
1852                 } else {
1853                         wm_low.yclk = rdev->pm.current_mclk * 10;
1854                         wm_low.sclk = rdev->pm.current_sclk * 10;
1855                 }
1856
1857                 wm_low.disp_clk = mode->clock;
1858                 wm_low.src_width = mode->crtc_hdisplay;
1859                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
1860                 wm_low.blank_time = line_time - wm_low.active_time;
1861                 wm_low.interlaced = false;
1862                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1863                         wm_low.interlaced = true;
1864                 wm_low.vsc = radeon_crtc->vsc;
1865                 wm_low.vtaps = 1;
1866                 if (radeon_crtc->rmx_type != RMX_OFF)
1867                         wm_low.vtaps = 2;
1868                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
1869                 wm_low.lb_size = lb_size;
1870                 wm_low.dram_channels = dram_channels;
1871                 wm_low.num_heads = num_heads;
1872
1873                 /* set for high clocks */
1874                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
1875                 /* set for low clocks */
1876                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
1877
1878                 /* possibly force display priority to high */
1879                 /* should really do this at mode validation time... */
1880                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
1881                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
1882                     !dce6_check_latency_hiding(&wm_high) ||
1883                     (rdev->disp_priority == 2)) {
1884                         DRM_DEBUG_KMS("force priority to high\n");
1885                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1886                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1887                 }
1888                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
1889                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
1890                     !dce6_check_latency_hiding(&wm_low) ||
1891                     (rdev->disp_priority == 2)) {
1892                         DRM_DEBUG_KMS("force priority to high\n");
1893                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1894                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1895                 }
1896
1897                 a.full = dfixed_const(1000);
1898                 b.full = dfixed_const(mode->clock);
1899                 b.full = dfixed_div(b, a);
1900                 c.full = dfixed_const(latency_watermark_a);
1901                 c.full = dfixed_mul(c, b);
1902                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1903                 c.full = dfixed_div(c, a);
1904                 a.full = dfixed_const(16);
1905                 c.full = dfixed_div(c, a);
1906                 priority_a_mark = dfixed_trunc(c);
1907                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1908
1909                 a.full = dfixed_const(1000);
1910                 b.full = dfixed_const(mode->clock);
1911                 b.full = dfixed_div(b, a);
1912                 c.full = dfixed_const(latency_watermark_b);
1913                 c.full = dfixed_mul(c, b);
1914                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1915                 c.full = dfixed_div(c, a);
1916                 a.full = dfixed_const(16);
1917                 c.full = dfixed_div(c, a);
1918                 priority_b_mark = dfixed_trunc(c);
1919                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1920         }
1921
1922         /* select wm A */
1923         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1924         tmp = arb_control3;
1925         tmp &= ~LATENCY_WATERMARK_MASK(3);
1926         tmp |= LATENCY_WATERMARK_MASK(1);
1927         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1928         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1929                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1930                 LATENCY_HIGH_WATERMARK(line_time)));
1931         /* select wm B */
1932         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1933         tmp &= ~LATENCY_WATERMARK_MASK(3);
1934         tmp |= LATENCY_WATERMARK_MASK(2);
1935         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1936         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1937                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1938                 LATENCY_HIGH_WATERMARK(line_time)));
1939         /* restore original selection */
1940         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1941
1942         /* write the priority marks */
1943         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1944         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1945
1946 }
1947
1948 void dce6_bandwidth_update(struct radeon_device *rdev)
1949 {
1950         struct drm_display_mode *mode0 = NULL;
1951         struct drm_display_mode *mode1 = NULL;
1952         u32 num_heads = 0, lb_size;
1953         int i;
1954
1955         radeon_update_display_priority(rdev);
1956
1957         for (i = 0; i < rdev->num_crtc; i++) {
1958                 if (rdev->mode_info.crtcs[i]->base.enabled)
1959                         num_heads++;
1960         }
1961         for (i = 0; i < rdev->num_crtc; i += 2) {
1962                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1963                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1964                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1965                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1966                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1967                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1968         }
1969 }
1970
1971 /*
1972  * Core functions
1973  */
1974 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1975 {
1976         const u32 num_tile_mode_states = 32;
1977         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1978
1979         switch (rdev->config.si.mem_row_size_in_kb) {
1980         case 1:
1981                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982                 break;
1983         case 2:
1984         default:
1985                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986                 break;
1987         case 4:
1988                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989                 break;
1990         }
1991
1992         if ((rdev->family == CHIP_TAHITI) ||
1993             (rdev->family == CHIP_PITCAIRN)) {
1994                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1995                         switch (reg_offset) {
1996                         case 0:  /* non-AA compressed depth or any compressed stencil */
1997                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1999                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2000                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2001                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2002                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2003                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2004                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2005                                 break;
2006                         case 1:  /* 2xAA/4xAA compressed depth only */
2007                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2008                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2009                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2010                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2011                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2012                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2013                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2014                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2015                                 break;
2016                         case 2:  /* 8xAA compressed depth only */
2017                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2018                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2019                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2020                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2021                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2022                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2023                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2024                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2025                                 break;
2026                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2027                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2028                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2029                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2030                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2031                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2032                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2033                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2034                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2035                                 break;
2036                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2037                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2038                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2039                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2040                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2041                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2042                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2043                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2044                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2045                                 break;
2046                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2047                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2049                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2050                                                  TILE_SPLIT(split_equal_to_row_size) |
2051                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2052                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2055                                 break;
2056                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2057                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2058                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2059                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2060                                                  TILE_SPLIT(split_equal_to_row_size) |
2061                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2062                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2063                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2064                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2065                                 break;
2066                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2067                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2069                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2070                                                  TILE_SPLIT(split_equal_to_row_size) |
2071                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2072                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2073                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2074                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2075                                 break;
2076                         case 8:  /* 1D and 1D Array Surfaces */
2077                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2078                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2079                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2080                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2081                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2082                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2083                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2084                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2085                                 break;
2086                         case 9:  /* Displayable maps. */
2087                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2088                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2089                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2090                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2091                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2092                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2093                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2094                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2095                                 break;
2096                         case 10:  /* Display 8bpp. */
2097                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2098                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2099                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2100                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2101                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2102                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2103                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2104                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2105                                 break;
2106                         case 11:  /* Display 16bpp. */
2107                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2108                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2109                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2110                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2111                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2112                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2113                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2114                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2115                                 break;
2116                         case 12:  /* Display 32bpp. */
2117                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2120                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2121                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2122                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2123                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2124                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2125                                 break;
2126                         case 13:  /* Thin. */
2127                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2128                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2129                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2130                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2131                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2132                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2133                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2134                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2135                                 break;
2136                         case 14:  /* Thin 8 bpp. */
2137                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2138                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2139                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2140                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2141                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2142                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2143                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2144                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2145                                 break;
2146                         case 15:  /* Thin 16 bpp. */
2147                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2149                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2150                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2151                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2152                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2154                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2155                                 break;
2156                         case 16:  /* Thin 32 bpp. */
2157                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2159                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2160                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2161                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2162                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2164                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2165                                 break;
2166                         case 17:  /* Thin 64 bpp. */
2167                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2168                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2169                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2170                                                  TILE_SPLIT(split_equal_to_row_size) |
2171                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2172                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2173                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2174                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2175                                 break;
2176                         case 21:  /* 8 bpp PRT. */
2177                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2179                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2180                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2181                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2182                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2183                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2184                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2185                                 break;
2186                         case 22:  /* 16 bpp PRT */
2187                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2188                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2189                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2190                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2191                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2192                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2193                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2194                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2195                                 break;
2196                         case 23:  /* 32 bpp PRT */
2197                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2198                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2199                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2200                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2201                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2202                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2204                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2205                                 break;
2206                         case 24:  /* 64 bpp PRT */
2207                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2208                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2209                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2210                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2211                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2212                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2214                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2215                                 break;
2216                         case 25:  /* 128 bpp PRT */
2217                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2218                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2219                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2220                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2221                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2222                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2223                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2224                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2225                                 break;
2226                         default:
2227                                 gb_tile_moden = 0;
2228                                 break;
2229                         }
2230                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2231                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2232                 }
2233         } else if ((rdev->family == CHIP_VERDE) ||
2234                    (rdev->family == CHIP_OLAND) ||
2235                    (rdev->family == CHIP_HAINAN)) {
2236                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2237                         switch (reg_offset) {
2238                         case 0:  /* non-AA compressed depth or any compressed stencil */
2239                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2241                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2242                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2243                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2244                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2245                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2246                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2247                                 break;
2248                         case 1:  /* 2xAA/4xAA compressed depth only */
2249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2251                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2252                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2253                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2254                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2257                                 break;
2258                         case 2:  /* 8xAA compressed depth only */
2259                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2261                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2262                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2263                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2264                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2267                                 break;
2268                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2269                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2271                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2272                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2273                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2274                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2276                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2277                                 break;
2278                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2282                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2283                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2284                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287                                 break;
2288                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2292                                                  TILE_SPLIT(split_equal_to_row_size) |
2293                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2294                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297                                 break;
2298                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2299                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2302                                                  TILE_SPLIT(split_equal_to_row_size) |
2303                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2304                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307                                 break;
2308                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2309                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2312                                                  TILE_SPLIT(split_equal_to_row_size) |
2313                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2314                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2316                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2317                                 break;
2318                         case 8:  /* 1D and 1D Array Surfaces */
2319                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2320                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2321                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2322                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2323                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2324                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2326                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2327                                 break;
2328                         case 9:  /* Displayable maps. */
2329                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2330                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2332                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2333                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2334                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337                                 break;
2338                         case 10:  /* Display 8bpp. */
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2342                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2343                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2344                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2346                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2347                                 break;
2348                         case 11:  /* Display 16bpp. */
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2352                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2353                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2354                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357                                 break;
2358                         case 12:  /* Display 32bpp. */
2359                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2362                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2363                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2364                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367                                 break;
2368                         case 13:  /* Thin. */
2369                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2371                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2372                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2373                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2374                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377                                 break;
2378                         case 14:  /* Thin 8 bpp. */
2379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2381                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2382                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2383                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2384                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2387                                 break;
2388                         case 15:  /* Thin 16 bpp. */
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2392                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2393                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2394                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397                                 break;
2398                         case 16:  /* Thin 32 bpp. */
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2402                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2403                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2404                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2407                                 break;
2408                         case 17:  /* Thin 64 bpp. */
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2412                                                  TILE_SPLIT(split_equal_to_row_size) |
2413                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2414                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2417                                 break;
2418                         case 21:  /* 8 bpp PRT. */
2419                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2423                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2424                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2425                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2426                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2427                                 break;
2428                         case 22:  /* 16 bpp PRT */
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2433                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2434                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2436                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2437                                 break;
2438                         case 23:  /* 32 bpp PRT */
2439                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2443                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2444                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2447                                 break;
2448                         case 24:  /* 64 bpp PRT */
2449                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2454                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2457                                 break;
2458                         case 25:  /* 128 bpp PRT */
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2463                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2464                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2467                                 break;
2468                         default:
2469                                 gb_tile_moden = 0;
2470                                 break;
2471                         }
2472                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2473                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2474                 }
2475         } else
2476                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2477 }
2478
2479 static void si_select_se_sh(struct radeon_device *rdev,
2480                             u32 se_num, u32 sh_num)
2481 {
2482         u32 data = INSTANCE_BROADCAST_WRITES;
2483
2484         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2485                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2486         else if (se_num == 0xffffffff)
2487                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2488         else if (sh_num == 0xffffffff)
2489                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2490         else
2491                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2492         WREG32(GRBM_GFX_INDEX, data);
2493 }
2494
2495 static u32 si_create_bitmask(u32 bit_width)
2496 {
2497         u32 i, mask = 0;
2498
2499         for (i = 0; i < bit_width; i++) {
2500                 mask <<= 1;
2501                 mask |= 1;
2502         }
2503         return mask;
2504 }
2505
2506 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2507 {
2508         u32 data, mask;
2509
2510         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2511         if (data & 1)
2512                 data &= INACTIVE_CUS_MASK;
2513         else
2514                 data = 0;
2515         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2516
2517         data >>= INACTIVE_CUS_SHIFT;
2518
2519         mask = si_create_bitmask(cu_per_sh);
2520
2521         return ~data & mask;
2522 }
2523
2524 static void si_setup_spi(struct radeon_device *rdev,
2525                          u32 se_num, u32 sh_per_se,
2526                          u32 cu_per_sh)
2527 {
2528         int i, j, k;
2529         u32 data, mask, active_cu;
2530
2531         for (i = 0; i < se_num; i++) {
2532                 for (j = 0; j < sh_per_se; j++) {
2533                         si_select_se_sh(rdev, i, j);
2534                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2535                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2536
2537                         mask = 1;
2538                         for (k = 0; k < 16; k++) {
2539                                 mask <<= k;
2540                                 if (active_cu & mask) {
2541                                         data &= ~mask;
2542                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2543                                         break;
2544                                 }
2545                         }
2546                 }
2547         }
2548         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2549 }
2550
2551 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2552                               u32 max_rb_num, u32 se_num,
2553                               u32 sh_per_se)
2554 {
2555         u32 data, mask;
2556
2557         data = RREG32(CC_RB_BACKEND_DISABLE);
2558         if (data & 1)
2559                 data &= BACKEND_DISABLE_MASK;
2560         else
2561                 data = 0;
2562         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2563
2564         data >>= BACKEND_DISABLE_SHIFT;
2565
2566         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2567
2568         return data & mask;
2569 }
2570
2571 static void si_setup_rb(struct radeon_device *rdev,
2572                         u32 se_num, u32 sh_per_se,
2573                         u32 max_rb_num)
2574 {
2575         int i, j;
2576         u32 data, mask;
2577         u32 disabled_rbs = 0;
2578         u32 enabled_rbs = 0;
2579
2580         for (i = 0; i < se_num; i++) {
2581                 for (j = 0; j < sh_per_se; j++) {
2582                         si_select_se_sh(rdev, i, j);
2583                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2584                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2585                 }
2586         }
2587         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2588
2589         mask = 1;
2590         for (i = 0; i < max_rb_num; i++) {
2591                 if (!(disabled_rbs & mask))
2592                         enabled_rbs |= mask;
2593                 mask <<= 1;
2594         }
2595
2596         for (i = 0; i < se_num; i++) {
2597                 si_select_se_sh(rdev, i, 0xffffffff);
2598                 data = 0;
2599                 for (j = 0; j < sh_per_se; j++) {
2600                         switch (enabled_rbs & 3) {
2601                         case 1:
2602                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2603                                 break;
2604                         case 2:
2605                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2606                                 break;
2607                         case 3:
2608                         default:
2609                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2610                                 break;
2611                         }
2612                         enabled_rbs >>= 2;
2613                 }
2614                 WREG32(PA_SC_RASTER_CONFIG, data);
2615         }
2616         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2617 }
2618
2619 static void si_gpu_init(struct radeon_device *rdev)
2620 {
2621         u32 gb_addr_config = 0;
2622         u32 mc_shared_chmap, mc_arb_ramcfg;
2623         u32 sx_debug_1;
2624         u32 hdp_host_path_cntl;
2625         u32 tmp;
2626         int i, j;
2627
2628         switch (rdev->family) {
2629         case CHIP_TAHITI:
2630                 rdev->config.si.max_shader_engines = 2;
2631                 rdev->config.si.max_tile_pipes = 12;
2632                 rdev->config.si.max_cu_per_sh = 8;
2633                 rdev->config.si.max_sh_per_se = 2;
2634                 rdev->config.si.max_backends_per_se = 4;
2635                 rdev->config.si.max_texture_channel_caches = 12;
2636                 rdev->config.si.max_gprs = 256;
2637                 rdev->config.si.max_gs_threads = 32;
2638                 rdev->config.si.max_hw_contexts = 8;
2639
2640                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2641                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2642                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2643                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2644                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2645                 break;
2646         case CHIP_PITCAIRN:
2647                 rdev->config.si.max_shader_engines = 2;
2648                 rdev->config.si.max_tile_pipes = 8;
2649                 rdev->config.si.max_cu_per_sh = 5;
2650                 rdev->config.si.max_sh_per_se = 2;
2651                 rdev->config.si.max_backends_per_se = 4;
2652                 rdev->config.si.max_texture_channel_caches = 8;
2653                 rdev->config.si.max_gprs = 256;
2654                 rdev->config.si.max_gs_threads = 32;
2655                 rdev->config.si.max_hw_contexts = 8;
2656
2657                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2658                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2659                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2660                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2661                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2662                 break;
2663         case CHIP_VERDE:
2664         default:
2665                 rdev->config.si.max_shader_engines = 1;
2666                 rdev->config.si.max_tile_pipes = 4;
2667                 rdev->config.si.max_cu_per_sh = 5;
2668                 rdev->config.si.max_sh_per_se = 2;
2669                 rdev->config.si.max_backends_per_se = 4;
2670                 rdev->config.si.max_texture_channel_caches = 4;
2671                 rdev->config.si.max_gprs = 256;
2672                 rdev->config.si.max_gs_threads = 32;
2673                 rdev->config.si.max_hw_contexts = 8;
2674
2675                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2676                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2677                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2678                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2679                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2680                 break;
2681         case CHIP_OLAND:
2682                 rdev->config.si.max_shader_engines = 1;
2683                 rdev->config.si.max_tile_pipes = 4;
2684                 rdev->config.si.max_cu_per_sh = 6;
2685                 rdev->config.si.max_sh_per_se = 1;
2686                 rdev->config.si.max_backends_per_se = 2;
2687                 rdev->config.si.max_texture_channel_caches = 4;
2688                 rdev->config.si.max_gprs = 256;
2689                 rdev->config.si.max_gs_threads = 16;
2690                 rdev->config.si.max_hw_contexts = 8;
2691
2692                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2693                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2694                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2695                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2696                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2697                 break;
2698         case CHIP_HAINAN:
2699                 rdev->config.si.max_shader_engines = 1;
2700                 rdev->config.si.max_tile_pipes = 4;
2701                 rdev->config.si.max_cu_per_sh = 5;
2702                 rdev->config.si.max_sh_per_se = 1;
2703                 rdev->config.si.max_backends_per_se = 1;
2704                 rdev->config.si.max_texture_channel_caches = 2;
2705                 rdev->config.si.max_gprs = 256;
2706                 rdev->config.si.max_gs_threads = 16;
2707                 rdev->config.si.max_hw_contexts = 8;
2708
2709                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2710                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2711                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2712                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2713                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2714                 break;
2715         }
2716
2717         /* Initialize HDP */
2718         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2719                 WREG32((0x2c14 + j), 0x00000000);
2720                 WREG32((0x2c18 + j), 0x00000000);
2721                 WREG32((0x2c1c + j), 0x00000000);
2722                 WREG32((0x2c20 + j), 0x00000000);
2723                 WREG32((0x2c24 + j), 0x00000000);
2724         }
2725
2726         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2727
2728         evergreen_fix_pci_max_read_req_size(rdev);
2729
2730         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2731
2732         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2733         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2734
2735         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2736         rdev->config.si.mem_max_burst_length_bytes = 256;
2737         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2738         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2739         if (rdev->config.si.mem_row_size_in_kb > 4)
2740                 rdev->config.si.mem_row_size_in_kb = 4;
2741         /* XXX use MC settings? */
2742         rdev->config.si.shader_engine_tile_size = 32;
2743         rdev->config.si.num_gpus = 1;
2744         rdev->config.si.multi_gpu_tile_size = 64;
2745
2746         /* fix up row size */
2747         gb_addr_config &= ~ROW_SIZE_MASK;
2748         switch (rdev->config.si.mem_row_size_in_kb) {
2749         case 1:
2750         default:
2751                 gb_addr_config |= ROW_SIZE(0);
2752                 break;
2753         case 2:
2754                 gb_addr_config |= ROW_SIZE(1);
2755                 break;
2756         case 4:
2757                 gb_addr_config |= ROW_SIZE(2);
2758                 break;
2759         }
2760
2761         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2762          * not have bank info, so create a custom tiling dword.
2763          * bits 3:0   num_pipes
2764          * bits 7:4   num_banks
2765          * bits 11:8  group_size
2766          * bits 15:12 row_size
2767          */
2768         rdev->config.si.tile_config = 0;
2769         switch (rdev->config.si.num_tile_pipes) {
2770         case 1:
2771                 rdev->config.si.tile_config |= (0 << 0);
2772                 break;
2773         case 2:
2774                 rdev->config.si.tile_config |= (1 << 0);
2775                 break;
2776         case 4:
2777                 rdev->config.si.tile_config |= (2 << 0);
2778                 break;
2779         case 8:
2780         default:
2781                 /* XXX what about 12? */
2782                 rdev->config.si.tile_config |= (3 << 0);
2783                 break;
2784         }       
2785         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2786         case 0: /* four banks */
2787                 rdev->config.si.tile_config |= 0 << 4;
2788                 break;
2789         case 1: /* eight banks */
2790                 rdev->config.si.tile_config |= 1 << 4;
2791                 break;
2792         case 2: /* sixteen banks */
2793         default:
2794                 rdev->config.si.tile_config |= 2 << 4;
2795                 break;
2796         }
2797         rdev->config.si.tile_config |=
2798                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2799         rdev->config.si.tile_config |=
2800                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2801
2802         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2803         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2804         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2805         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2806         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2807         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2808         if (rdev->has_uvd) {
2809                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2810                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2811                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2812         }
2813
2814         si_tiling_mode_table_init(rdev);
2815
2816         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2817                     rdev->config.si.max_sh_per_se,
2818                     rdev->config.si.max_backends_per_se);
2819
2820         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2821                      rdev->config.si.max_sh_per_se,
2822                      rdev->config.si.max_cu_per_sh);
2823
2824
2825         /* set HW defaults for 3D engine */
2826         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2827                                      ROQ_IB2_START(0x2b)));
2828         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2829
2830         sx_debug_1 = RREG32(SX_DEBUG_1);
2831         WREG32(SX_DEBUG_1, sx_debug_1);
2832
2833         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2834
2835         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2836                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2837                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2838                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2839
2840         WREG32(VGT_NUM_INSTANCES, 1);
2841
2842         WREG32(CP_PERFMON_CNTL, 0);
2843
2844         WREG32(SQ_CONFIG, 0);
2845
2846         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2847                                           FORCE_EOV_MAX_REZ_CNT(255)));
2848
2849         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2850                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2851
2852         WREG32(VGT_GS_VERTEX_REUSE, 16);
2853         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2854
2855         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2856         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2857         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2858         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2859         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2860         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2861         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2862         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2863
2864         tmp = RREG32(HDP_MISC_CNTL);
2865         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2866         WREG32(HDP_MISC_CNTL, tmp);
2867
2868         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2869         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2870
2871         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2872
2873         udelay(50);
2874 }
2875
2876 /*
2877  * GPU scratch registers helpers function.
2878  */
2879 static void si_scratch_init(struct radeon_device *rdev)
2880 {
2881         int i;
2882
2883         rdev->scratch.num_reg = 7;
2884         rdev->scratch.reg_base = SCRATCH_REG0;
2885         for (i = 0; i < rdev->scratch.num_reg; i++) {
2886                 rdev->scratch.free[i] = true;
2887                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2888         }
2889 }
2890
2891 void si_fence_ring_emit(struct radeon_device *rdev,
2892                         struct radeon_fence *fence)
2893 {
2894         struct radeon_ring *ring = &rdev->ring[fence->ring];
2895         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2896
2897         /* flush read cache over gart */
2898         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2899         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2900         radeon_ring_write(ring, 0);
2901         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2902         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2903                           PACKET3_TC_ACTION_ENA |
2904                           PACKET3_SH_KCACHE_ACTION_ENA |
2905                           PACKET3_SH_ICACHE_ACTION_ENA);
2906         radeon_ring_write(ring, 0xFFFFFFFF);
2907         radeon_ring_write(ring, 0);
2908         radeon_ring_write(ring, 10); /* poll interval */
2909         /* EVENT_WRITE_EOP - flush caches, send int */
2910         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2911         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2912         radeon_ring_write(ring, addr & 0xffffffff);
2913         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2914         radeon_ring_write(ring, fence->seq);
2915         radeon_ring_write(ring, 0);
2916 }
2917
2918 /*
2919  * IB stuff
2920  */
2921 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2922 {
2923         struct radeon_ring *ring = &rdev->ring[ib->ring];
2924         u32 header;
2925
2926         if (ib->is_const_ib) {
2927                 /* set switch buffer packet before const IB */
2928                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2929                 radeon_ring_write(ring, 0);
2930
2931                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2932         } else {
2933                 u32 next_rptr;
2934                 if (ring->rptr_save_reg) {
2935                         next_rptr = ring->wptr + 3 + 4 + 8;
2936                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2937                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2938                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2939                         radeon_ring_write(ring, next_rptr);
2940                 } else if (rdev->wb.enabled) {
2941                         next_rptr = ring->wptr + 5 + 4 + 8;
2942                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2943                         radeon_ring_write(ring, (1 << 8));
2944                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2945                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2946                         radeon_ring_write(ring, next_rptr);
2947                 }
2948
2949                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2950         }
2951
2952         radeon_ring_write(ring, header);
2953         radeon_ring_write(ring,
2954 #ifdef __BIG_ENDIAN
2955                           (2 << 0) |
2956 #endif
2957                           (ib->gpu_addr & 0xFFFFFFFC));
2958         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2959         radeon_ring_write(ring, ib->length_dw |
2960                           (ib->vm ? (ib->vm->id << 24) : 0));
2961
2962         if (!ib->is_const_ib) {
2963                 /* flush read cache over gart for this vmid */
2964                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2965                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2966                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2967                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2968                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2969                                   PACKET3_TC_ACTION_ENA |
2970                                   PACKET3_SH_KCACHE_ACTION_ENA |
2971                                   PACKET3_SH_ICACHE_ACTION_ENA);
2972                 radeon_ring_write(ring, 0xFFFFFFFF);
2973                 radeon_ring_write(ring, 0);
2974                 radeon_ring_write(ring, 10); /* poll interval */
2975         }
2976 }
2977
2978 /*
2979  * CP.
2980  */
2981 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2982 {
2983         if (enable)
2984                 WREG32(CP_ME_CNTL, 0);
2985         else {
2986                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2987                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2988                 WREG32(SCRATCH_UMSK, 0);
2989                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2990                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2991                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2992         }
2993         udelay(50);
2994 }
2995
2996 static int si_cp_load_microcode(struct radeon_device *rdev)
2997 {
2998         const __be32 *fw_data;
2999         int i;
3000
3001         if (!rdev->me_fw || !rdev->pfp_fw)
3002                 return -EINVAL;
3003
3004         si_cp_enable(rdev, false);
3005
3006         /* PFP */
3007         fw_data = (const __be32 *)rdev->pfp_fw->data;
3008         WREG32(CP_PFP_UCODE_ADDR, 0);
3009         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3010                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3011         WREG32(CP_PFP_UCODE_ADDR, 0);
3012
3013         /* CE */
3014         fw_data = (const __be32 *)rdev->ce_fw->data;
3015         WREG32(CP_CE_UCODE_ADDR, 0);
3016         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3017                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3018         WREG32(CP_CE_UCODE_ADDR, 0);
3019
3020         /* ME */
3021         fw_data = (const __be32 *)rdev->me_fw->data;
3022         WREG32(CP_ME_RAM_WADDR, 0);
3023         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3024                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3025         WREG32(CP_ME_RAM_WADDR, 0);
3026
3027         WREG32(CP_PFP_UCODE_ADDR, 0);
3028         WREG32(CP_CE_UCODE_ADDR, 0);
3029         WREG32(CP_ME_RAM_WADDR, 0);
3030         WREG32(CP_ME_RAM_RADDR, 0);
3031         return 0;
3032 }
3033
3034 static int si_cp_start(struct radeon_device *rdev)
3035 {
3036         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3037         int r, i;
3038
3039         r = radeon_ring_lock(rdev, ring, 7 + 4);
3040         if (r) {
3041                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3042                 return r;
3043         }
3044         /* init the CP */
3045         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3046         radeon_ring_write(ring, 0x1);
3047         radeon_ring_write(ring, 0x0);
3048         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3049         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3050         radeon_ring_write(ring, 0);
3051         radeon_ring_write(ring, 0);
3052
3053         /* init the CE partitions */
3054         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3055         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3056         radeon_ring_write(ring, 0xc000);
3057         radeon_ring_write(ring, 0xe000);
3058         radeon_ring_unlock_commit(rdev, ring);
3059
3060         si_cp_enable(rdev, true);
3061
3062         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3063         if (r) {
3064                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3065                 return r;
3066         }
3067
3068         /* setup clear context state */
3069         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3070         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3071
3072         for (i = 0; i < si_default_size; i++)
3073                 radeon_ring_write(ring, si_default_state[i]);
3074
3075         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3076         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3077
3078         /* set clear context state */
3079         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3080         radeon_ring_write(ring, 0);
3081
3082         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3083         radeon_ring_write(ring, 0x00000316);
3084         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3085         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3086
3087         radeon_ring_unlock_commit(rdev, ring);
3088
3089         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3090                 ring = &rdev->ring[i];
3091                 r = radeon_ring_lock(rdev, ring, 2);
3092
3093                 /* clear the compute context state */
3094                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3095                 radeon_ring_write(ring, 0);
3096
3097                 radeon_ring_unlock_commit(rdev, ring);
3098         }
3099
3100         return 0;
3101 }
3102
3103 static void si_cp_fini(struct radeon_device *rdev)
3104 {
3105         struct radeon_ring *ring;
3106         si_cp_enable(rdev, false);
3107
3108         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3109         radeon_ring_fini(rdev, ring);
3110         radeon_scratch_free(rdev, ring->rptr_save_reg);
3111
3112         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3113         radeon_ring_fini(rdev, ring);
3114         radeon_scratch_free(rdev, ring->rptr_save_reg);
3115
3116         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3117         radeon_ring_fini(rdev, ring);
3118         radeon_scratch_free(rdev, ring->rptr_save_reg);
3119 }
3120
3121 static int si_cp_resume(struct radeon_device *rdev)
3122 {
3123         struct radeon_ring *ring;
3124         u32 tmp;
3125         u32 rb_bufsz;
3126         int r;
3127
3128         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3129         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3130                                  SOFT_RESET_PA |
3131                                  SOFT_RESET_VGT |
3132                                  SOFT_RESET_SPI |
3133                                  SOFT_RESET_SX));
3134         RREG32(GRBM_SOFT_RESET);
3135         mdelay(15);
3136         WREG32(GRBM_SOFT_RESET, 0);
3137         RREG32(GRBM_SOFT_RESET);
3138
3139         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3140         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3141
3142         /* Set the write pointer delay */
3143         WREG32(CP_RB_WPTR_DELAY, 0);
3144
3145         WREG32(CP_DEBUG, 0);
3146         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3147
3148         /* ring 0 - compute and gfx */
3149         /* Set ring buffer size */
3150         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3151         rb_bufsz = drm_order(ring->ring_size / 8);
3152         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3153 #ifdef __BIG_ENDIAN
3154         tmp |= BUF_SWAP_32BIT;
3155 #endif
3156         WREG32(CP_RB0_CNTL, tmp);
3157
3158         /* Initialize the ring buffer's read and write pointers */
3159         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3160         ring->wptr = 0;
3161         WREG32(CP_RB0_WPTR, ring->wptr);
3162
3163         /* set the wb address whether it's enabled or not */
3164         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3165         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3166
3167         if (rdev->wb.enabled)
3168                 WREG32(SCRATCH_UMSK, 0xff);
3169         else {
3170                 tmp |= RB_NO_UPDATE;
3171                 WREG32(SCRATCH_UMSK, 0);
3172         }
3173
3174         mdelay(1);
3175         WREG32(CP_RB0_CNTL, tmp);
3176
3177         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3178
3179         ring->rptr = RREG32(CP_RB0_RPTR);
3180
3181         /* ring1  - compute only */
3182         /* Set ring buffer size */
3183         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3184         rb_bufsz = drm_order(ring->ring_size / 8);
3185         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3186 #ifdef __BIG_ENDIAN
3187         tmp |= BUF_SWAP_32BIT;
3188 #endif
3189         WREG32(CP_RB1_CNTL, tmp);
3190
3191         /* Initialize the ring buffer's read and write pointers */
3192         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3193         ring->wptr = 0;
3194         WREG32(CP_RB1_WPTR, ring->wptr);
3195
3196         /* set the wb address whether it's enabled or not */
3197         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3198         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3199
3200         mdelay(1);
3201         WREG32(CP_RB1_CNTL, tmp);
3202
3203         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3204
3205         ring->rptr = RREG32(CP_RB1_RPTR);
3206
3207         /* ring2 - compute only */
3208         /* Set ring buffer size */
3209         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3210         rb_bufsz = drm_order(ring->ring_size / 8);
3211         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3212 #ifdef __BIG_ENDIAN
3213         tmp |= BUF_SWAP_32BIT;
3214 #endif
3215         WREG32(CP_RB2_CNTL, tmp);
3216
3217         /* Initialize the ring buffer's read and write pointers */
3218         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3219         ring->wptr = 0;
3220         WREG32(CP_RB2_WPTR, ring->wptr);
3221
3222         /* set the wb address whether it's enabled or not */
3223         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3224         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3225
3226         mdelay(1);
3227         WREG32(CP_RB2_CNTL, tmp);
3228
3229         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3230
3231         ring->rptr = RREG32(CP_RB2_RPTR);
3232
3233         /* start the rings */
3234         si_cp_start(rdev);
3235         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3236         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3237         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3238         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3239         if (r) {
3240                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3241                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3242                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3243                 return r;
3244         }
3245         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3246         if (r) {
3247                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3248         }
3249         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3250         if (r) {
3251                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3252         }
3253
3254         return 0;
3255 }
3256
3257 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3258 {
3259         u32 reset_mask = 0;
3260         u32 tmp;
3261
3262         /* GRBM_STATUS */
3263         tmp = RREG32(GRBM_STATUS);
3264         if (tmp & (PA_BUSY | SC_BUSY |
3265                    BCI_BUSY | SX_BUSY |
3266                    TA_BUSY | VGT_BUSY |
3267                    DB_BUSY | CB_BUSY |
3268                    GDS_BUSY | SPI_BUSY |
3269                    IA_BUSY | IA_BUSY_NO_DMA))
3270                 reset_mask |= RADEON_RESET_GFX;
3271
3272         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3273                    CP_BUSY | CP_COHERENCY_BUSY))
3274                 reset_mask |= RADEON_RESET_CP;
3275
3276         if (tmp & GRBM_EE_BUSY)
3277                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3278
3279         /* GRBM_STATUS2 */
3280         tmp = RREG32(GRBM_STATUS2);
3281         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3282                 reset_mask |= RADEON_RESET_RLC;
3283
3284         /* DMA_STATUS_REG 0 */
3285         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3286         if (!(tmp & DMA_IDLE))
3287                 reset_mask |= RADEON_RESET_DMA;
3288
3289         /* DMA_STATUS_REG 1 */
3290         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3291         if (!(tmp & DMA_IDLE))
3292                 reset_mask |= RADEON_RESET_DMA1;
3293
3294         /* SRBM_STATUS2 */
3295         tmp = RREG32(SRBM_STATUS2);
3296         if (tmp & DMA_BUSY)
3297                 reset_mask |= RADEON_RESET_DMA;
3298
3299         if (tmp & DMA1_BUSY)
3300                 reset_mask |= RADEON_RESET_DMA1;
3301
3302         /* SRBM_STATUS */
3303         tmp = RREG32(SRBM_STATUS);
3304
3305         if (tmp & IH_BUSY)
3306                 reset_mask |= RADEON_RESET_IH;
3307
3308         if (tmp & SEM_BUSY)
3309                 reset_mask |= RADEON_RESET_SEM;
3310
3311         if (tmp & GRBM_RQ_PENDING)
3312                 reset_mask |= RADEON_RESET_GRBM;
3313
3314         if (tmp & VMC_BUSY)
3315                 reset_mask |= RADEON_RESET_VMC;
3316
3317         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3318                    MCC_BUSY | MCD_BUSY))
3319                 reset_mask |= RADEON_RESET_MC;
3320
3321         if (evergreen_is_display_hung(rdev))
3322                 reset_mask |= RADEON_RESET_DISPLAY;
3323
3324         /* VM_L2_STATUS */
3325         tmp = RREG32(VM_L2_STATUS);
3326         if (tmp & L2_BUSY)
3327                 reset_mask |= RADEON_RESET_VMC;
3328
3329         /* Skip MC reset as it's mostly likely not hung, just busy */
3330         if (reset_mask & RADEON_RESET_MC) {
3331                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3332                 reset_mask &= ~RADEON_RESET_MC;
3333         }
3334
3335         return reset_mask;
3336 }
3337
3338 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3339 {
3340         struct evergreen_mc_save save;
3341         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3342         u32 tmp;
3343
3344         if (reset_mask == 0)
3345                 return;
3346
3347         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3348
3349         evergreen_print_gpu_status_regs(rdev);
3350         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3351                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3352         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3353                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3354
3355         /* Disable CP parsing/prefetching */
3356         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3357
3358         if (reset_mask & RADEON_RESET_DMA) {
3359                 /* dma0 */
3360                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3361                 tmp &= ~DMA_RB_ENABLE;
3362                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3363         }
3364         if (reset_mask & RADEON_RESET_DMA1) {
3365                 /* dma1 */
3366                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3367                 tmp &= ~DMA_RB_ENABLE;
3368                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3369         }
3370
3371         udelay(50);
3372
3373         evergreen_mc_stop(rdev, &save);
3374         if (evergreen_mc_wait_for_idle(rdev)) {
3375                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3376         }
3377
3378         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3379                 grbm_soft_reset = SOFT_RESET_CB |
3380                         SOFT_RESET_DB |
3381                         SOFT_RESET_GDS |
3382                         SOFT_RESET_PA |
3383                         SOFT_RESET_SC |
3384                         SOFT_RESET_BCI |
3385                         SOFT_RESET_SPI |
3386                         SOFT_RESET_SX |
3387                         SOFT_RESET_TC |
3388                         SOFT_RESET_TA |
3389                         SOFT_RESET_VGT |
3390                         SOFT_RESET_IA;
3391         }
3392
3393         if (reset_mask & RADEON_RESET_CP) {
3394                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3395
3396                 srbm_soft_reset |= SOFT_RESET_GRBM;
3397         }
3398
3399         if (reset_mask & RADEON_RESET_DMA)
3400                 srbm_soft_reset |= SOFT_RESET_DMA;
3401
3402         if (reset_mask & RADEON_RESET_DMA1)
3403                 srbm_soft_reset |= SOFT_RESET_DMA1;
3404
3405         if (reset_mask & RADEON_RESET_DISPLAY)
3406                 srbm_soft_reset |= SOFT_RESET_DC;
3407
3408         if (reset_mask & RADEON_RESET_RLC)
3409                 grbm_soft_reset |= SOFT_RESET_RLC;
3410
3411         if (reset_mask & RADEON_RESET_SEM)
3412                 srbm_soft_reset |= SOFT_RESET_SEM;
3413
3414         if (reset_mask & RADEON_RESET_IH)
3415                 srbm_soft_reset |= SOFT_RESET_IH;
3416
3417         if (reset_mask & RADEON_RESET_GRBM)
3418                 srbm_soft_reset |= SOFT_RESET_GRBM;
3419
3420         if (reset_mask & RADEON_RESET_VMC)
3421                 srbm_soft_reset |= SOFT_RESET_VMC;
3422
3423         if (reset_mask & RADEON_RESET_MC)
3424                 srbm_soft_reset |= SOFT_RESET_MC;
3425
3426         if (grbm_soft_reset) {
3427                 tmp = RREG32(GRBM_SOFT_RESET);
3428                 tmp |= grbm_soft_reset;
3429                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3430                 WREG32(GRBM_SOFT_RESET, tmp);
3431                 tmp = RREG32(GRBM_SOFT_RESET);
3432
3433                 udelay(50);
3434
3435                 tmp &= ~grbm_soft_reset;
3436                 WREG32(GRBM_SOFT_RESET, tmp);
3437                 tmp = RREG32(GRBM_SOFT_RESET);
3438         }
3439
3440         if (srbm_soft_reset) {
3441                 tmp = RREG32(SRBM_SOFT_RESET);
3442                 tmp |= srbm_soft_reset;
3443                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3444                 WREG32(SRBM_SOFT_RESET, tmp);
3445                 tmp = RREG32(SRBM_SOFT_RESET);
3446
3447                 udelay(50);
3448
3449                 tmp &= ~srbm_soft_reset;
3450                 WREG32(SRBM_SOFT_RESET, tmp);
3451                 tmp = RREG32(SRBM_SOFT_RESET);
3452         }
3453
3454         /* Wait a little for things to settle down */
3455         udelay(50);
3456
3457         evergreen_mc_resume(rdev, &save);
3458         udelay(50);
3459
3460         evergreen_print_gpu_status_regs(rdev);
3461 }
3462
3463 int si_asic_reset(struct radeon_device *rdev)
3464 {
3465         u32 reset_mask;
3466
3467         reset_mask = si_gpu_check_soft_reset(rdev);
3468
3469         if (reset_mask)
3470                 r600_set_bios_scratch_engine_hung(rdev, true);
3471
3472         si_gpu_soft_reset(rdev, reset_mask);
3473
3474         reset_mask = si_gpu_check_soft_reset(rdev);
3475
3476         if (!reset_mask)
3477                 r600_set_bios_scratch_engine_hung(rdev, false);
3478
3479         return 0;
3480 }
3481
3482 /**
3483  * si_gfx_is_lockup - Check if the GFX engine is locked up
3484  *
3485  * @rdev: radeon_device pointer
3486  * @ring: radeon_ring structure holding ring information
3487  *
3488  * Check if the GFX engine is locked up.
3489  * Returns true if the engine appears to be locked up, false if not.
3490  */
3491 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3492 {
3493         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3494
3495         if (!(reset_mask & (RADEON_RESET_GFX |
3496                             RADEON_RESET_COMPUTE |
3497                             RADEON_RESET_CP))) {
3498                 radeon_ring_lockup_update(ring);
3499                 return false;
3500         }
3501         /* force CP activities */
3502         radeon_ring_force_activity(rdev, ring);
3503         return radeon_ring_test_lockup(rdev, ring);
3504 }
3505
3506 /**
3507  * si_dma_is_lockup - Check if the DMA engine is locked up
3508  *
3509  * @rdev: radeon_device pointer
3510  * @ring: radeon_ring structure holding ring information
3511  *
3512  * Check if the async DMA engine is locked up.
3513  * Returns true if the engine appears to be locked up, false if not.
3514  */
3515 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3516 {
3517         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3518         u32 mask;
3519
3520         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3521                 mask = RADEON_RESET_DMA;
3522         else
3523                 mask = RADEON_RESET_DMA1;
3524
3525         if (!(reset_mask & mask)) {
3526                 radeon_ring_lockup_update(ring);
3527                 return false;
3528         }
3529         /* force ring activities */
3530         radeon_ring_force_activity(rdev, ring);
3531         return radeon_ring_test_lockup(rdev, ring);
3532 }
3533
3534 /* MC */
3535 static void si_mc_program(struct radeon_device *rdev)
3536 {
3537         struct evergreen_mc_save save;
3538         u32 tmp;
3539         int i, j;
3540
3541         /* Initialize HDP */
3542         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3543                 WREG32((0x2c14 + j), 0x00000000);
3544                 WREG32((0x2c18 + j), 0x00000000);
3545                 WREG32((0x2c1c + j), 0x00000000);
3546                 WREG32((0x2c20 + j), 0x00000000);
3547                 WREG32((0x2c24 + j), 0x00000000);
3548         }
3549         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3550
3551         evergreen_mc_stop(rdev, &save);
3552         if (radeon_mc_wait_for_idle(rdev)) {
3553                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3554         }
3555         if (!ASIC_IS_NODCE(rdev))
3556                 /* Lockout access through VGA aperture*/
3557                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3558         /* Update configuration */
3559         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3560                rdev->mc.vram_start >> 12);
3561         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3562                rdev->mc.vram_end >> 12);
3563         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3564                rdev->vram_scratch.gpu_addr >> 12);
3565         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3566         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3567         WREG32(MC_VM_FB_LOCATION, tmp);
3568         /* XXX double check these! */
3569         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3570         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3571         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3572         WREG32(MC_VM_AGP_BASE, 0);
3573         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3574         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3575         if (radeon_mc_wait_for_idle(rdev)) {
3576                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3577         }
3578         evergreen_mc_resume(rdev, &save);
3579         if (!ASIC_IS_NODCE(rdev)) {
3580                 /* we need to own VRAM, so turn off the VGA renderer here
3581                  * to stop it overwriting our objects */
3582                 rv515_vga_render_disable(rdev);
3583         }
3584 }
3585
3586 void si_vram_gtt_location(struct radeon_device *rdev,
3587                           struct radeon_mc *mc)
3588 {
3589         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3590                 /* leave room for at least 1024M GTT */
3591                 dev_warn(rdev->dev, "limiting VRAM\n");
3592                 mc->real_vram_size = 0xFFC0000000ULL;
3593                 mc->mc_vram_size = 0xFFC0000000ULL;
3594         }
3595         radeon_vram_location(rdev, &rdev->mc, 0);
3596         rdev->mc.gtt_base_align = 0;
3597         radeon_gtt_location(rdev, mc);
3598 }
3599
3600 static int si_mc_init(struct radeon_device *rdev)
3601 {
3602         u32 tmp;
3603         int chansize, numchan;
3604
3605         /* Get VRAM informations */
3606         rdev->mc.vram_is_ddr = true;
3607         tmp = RREG32(MC_ARB_RAMCFG);
3608         if (tmp & CHANSIZE_OVERRIDE) {
3609                 chansize = 16;
3610         } else if (tmp & CHANSIZE_MASK) {
3611                 chansize = 64;
3612         } else {
3613                 chansize = 32;
3614         }
3615         tmp = RREG32(MC_SHARED_CHMAP);
3616         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3617         case 0:
3618         default:
3619                 numchan = 1;
3620                 break;
3621         case 1:
3622                 numchan = 2;
3623                 break;
3624         case 2:
3625                 numchan = 4;
3626                 break;
3627         case 3:
3628                 numchan = 8;
3629                 break;
3630         case 4:
3631                 numchan = 3;
3632                 break;
3633         case 5:
3634                 numchan = 6;
3635                 break;
3636         case 6:
3637                 numchan = 10;
3638                 break;
3639         case 7:
3640                 numchan = 12;
3641                 break;
3642         case 8:
3643                 numchan = 16;
3644                 break;
3645         }
3646         rdev->mc.vram_width = numchan * chansize;
3647         /* Could aper size report 0 ? */
3648         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3649         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3650         /* size in MB on si */
3651         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3652         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3653         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3654         si_vram_gtt_location(rdev, &rdev->mc);
3655         radeon_update_bandwidth_info(rdev);
3656
3657         return 0;
3658 }
3659
3660 /*
3661  * GART
3662  */
3663 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3664 {
3665         /* flush hdp cache */
3666         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3667
3668         /* bits 0-15 are the VM contexts0-15 */
3669         WREG32(VM_INVALIDATE_REQUEST, 1);
3670 }
3671
3672 static int si_pcie_gart_enable(struct radeon_device *rdev)
3673 {
3674         int r, i;
3675
3676         if (rdev->gart.robj == NULL) {
3677                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3678                 return -EINVAL;
3679         }
3680         r = radeon_gart_table_vram_pin(rdev);
3681         if (r)
3682                 return r;
3683         radeon_gart_restore(rdev);
3684         /* Setup TLB control */
3685         WREG32(MC_VM_MX_L1_TLB_CNTL,
3686                (0xA << 7) |
3687                ENABLE_L1_TLB |
3688                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3689                ENABLE_ADVANCED_DRIVER_MODEL |
3690                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3691         /* Setup L2 cache */
3692         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3693                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3694                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3695                EFFECTIVE_L2_QUEUE_SIZE(7) |
3696                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3697         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3698         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3699                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3700         /* setup context0 */
3701         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3702         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3703         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3704         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3705                         (u32)(rdev->dummy_page.addr >> 12));
3706         WREG32(VM_CONTEXT0_CNTL2, 0);
3707         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3708                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3709
3710         WREG32(0x15D4, 0);
3711         WREG32(0x15D8, 0);
3712         WREG32(0x15DC, 0);
3713
3714         /* empty context1-15 */
3715         /* set vm size, must be a multiple of 4 */
3716         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3717         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3718         /* Assign the pt base to something valid for now; the pts used for
3719          * the VMs are determined by the application and setup and assigned
3720          * on the fly in the vm part of radeon_gart.c
3721          */
3722         for (i = 1; i < 16; i++) {
3723                 if (i < 8)
3724                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3725                                rdev->gart.table_addr >> 12);
3726                 else
3727                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3728                                rdev->gart.table_addr >> 12);
3729         }
3730
3731         /* enable context1-15 */
3732         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3733                (u32)(rdev->dummy_page.addr >> 12));
3734         WREG32(VM_CONTEXT1_CNTL2, 4);
3735         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3736                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3737                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3738                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3739                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3740                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3741                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3742                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3743                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3744                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3745                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3746                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3747                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3748
3749         si_pcie_gart_tlb_flush(rdev);
3750         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3751                  (unsigned)(rdev->mc.gtt_size >> 20),
3752                  (unsigned long long)rdev->gart.table_addr);
3753         rdev->gart.ready = true;
3754         return 0;
3755 }
3756
3757 static void si_pcie_gart_disable(struct radeon_device *rdev)
3758 {
3759         /* Disable all tables */
3760         WREG32(VM_CONTEXT0_CNTL, 0);
3761         WREG32(VM_CONTEXT1_CNTL, 0);
3762         /* Setup TLB control */
3763         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3764                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3765         /* Setup L2 cache */
3766         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3767                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3768                EFFECTIVE_L2_QUEUE_SIZE(7) |
3769                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3770         WREG32(VM_L2_CNTL2, 0);
3771         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3772                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3773         radeon_gart_table_vram_unpin(rdev);
3774 }
3775
3776 static void si_pcie_gart_fini(struct radeon_device *rdev)
3777 {
3778         si_pcie_gart_disable(rdev);
3779         radeon_gart_table_vram_free(rdev);
3780         radeon_gart_fini(rdev);
3781 }
3782
3783 /* vm parser */
3784 static bool si_vm_reg_valid(u32 reg)
3785 {
3786         /* context regs are fine */
3787         if (reg >= 0x28000)
3788                 return true;
3789
3790         /* check config regs */
3791         switch (reg) {
3792         case GRBM_GFX_INDEX:
3793         case CP_STRMOUT_CNTL:
3794         case VGT_VTX_VECT_EJECT_REG:
3795         case VGT_CACHE_INVALIDATION:
3796         case VGT_ESGS_RING_SIZE:
3797         case VGT_GSVS_RING_SIZE:
3798         case VGT_GS_VERTEX_REUSE:
3799         case VGT_PRIMITIVE_TYPE:
3800         case VGT_INDEX_TYPE:
3801         case VGT_NUM_INDICES:
3802         case VGT_NUM_INSTANCES:
3803         case VGT_TF_RING_SIZE:
3804         case VGT_HS_OFFCHIP_PARAM:
3805         case VGT_TF_MEMORY_BASE:
3806         case PA_CL_ENHANCE:
3807         case PA_SU_LINE_STIPPLE_VALUE:
3808         case PA_SC_LINE_STIPPLE_STATE:
3809         case PA_SC_ENHANCE:
3810         case SQC_CACHES:
3811         case SPI_STATIC_THREAD_MGMT_1:
3812         case SPI_STATIC_THREAD_MGMT_2:
3813         case SPI_STATIC_THREAD_MGMT_3:
3814         case SPI_PS_MAX_WAVE_ID:
3815         case SPI_CONFIG_CNTL:
3816         case SPI_CONFIG_CNTL_1:
3817         case TA_CNTL_AUX:
3818                 return true;
3819         default:
3820                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3821                 return false;
3822         }
3823 }
3824
3825 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3826                                   u32 *ib, struct radeon_cs_packet *pkt)
3827 {
3828         switch (pkt->opcode) {
3829         case PACKET3_NOP:
3830         case PACKET3_SET_BASE:
3831         case PACKET3_SET_CE_DE_COUNTERS:
3832         case PACKET3_LOAD_CONST_RAM:
3833         case PACKET3_WRITE_CONST_RAM:
3834         case PACKET3_WRITE_CONST_RAM_OFFSET:
3835         case PACKET3_DUMP_CONST_RAM:
3836         case PACKET3_INCREMENT_CE_COUNTER:
3837         case PACKET3_WAIT_ON_DE_COUNTER:
3838         case PACKET3_CE_WRITE:
3839                 break;
3840         default:
3841                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3842                 return -EINVAL;
3843         }
3844         return 0;
3845 }
3846
3847 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3848                                    u32 *ib, struct radeon_cs_packet *pkt)
3849 {
3850         u32 idx = pkt->idx + 1;
3851         u32 idx_value = ib[idx];
3852         u32 start_reg, end_reg, reg, i;
3853         u32 command, info;
3854
3855         switch (pkt->opcode) {
3856         case PACKET3_NOP:
3857         case PACKET3_SET_BASE:
3858         case PACKET3_CLEAR_STATE:
3859         case PACKET3_INDEX_BUFFER_SIZE:
3860         case PACKET3_DISPATCH_DIRECT:
3861         case PACKET3_DISPATCH_INDIRECT:
3862         case PACKET3_ALLOC_GDS:
3863         case PACKET3_WRITE_GDS_RAM:
3864         case PACKET3_ATOMIC_GDS:
3865         case PACKET3_ATOMIC:
3866         case PACKET3_OCCLUSION_QUERY:
3867         case PACKET3_SET_PREDICATION:
3868         case PACKET3_COND_EXEC:
3869         case PACKET3_PRED_EXEC:
3870         case PACKET3_DRAW_INDIRECT:
3871         case PACKET3_DRAW_INDEX_INDIRECT:
3872         case PACKET3_INDEX_BASE:
3873         case PACKET3_DRAW_INDEX_2:
3874         case PACKET3_CONTEXT_CONTROL:
3875         case PACKET3_INDEX_TYPE:
3876         case PACKET3_DRAW_INDIRECT_MULTI:
3877         case PACKET3_DRAW_INDEX_AUTO:
3878         case PACKET3_DRAW_INDEX_IMMD:
3879         case PACKET3_NUM_INSTANCES:
3880         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3881         case PACKET3_STRMOUT_BUFFER_UPDATE:
3882         case PACKET3_DRAW_INDEX_OFFSET_2:
3883         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3884         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3885         case PACKET3_MPEG_INDEX:
3886         case PACKET3_WAIT_REG_MEM:
3887         case PACKET3_MEM_WRITE:
3888         case PACKET3_PFP_SYNC_ME:
3889         case PACKET3_SURFACE_SYNC:
3890         case PACKET3_EVENT_WRITE:
3891         case PACKET3_EVENT_WRITE_EOP:
3892         case PACKET3_EVENT_WRITE_EOS:
3893         case PACKET3_SET_CONTEXT_REG:
3894         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3895         case PACKET3_SET_SH_REG:
3896         case PACKET3_SET_SH_REG_OFFSET:
3897         case PACKET3_INCREMENT_DE_COUNTER:
3898         case PACKET3_WAIT_ON_CE_COUNTER:
3899         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3900         case PACKET3_ME_WRITE:
3901                 break;
3902         case PACKET3_COPY_DATA:
3903                 if ((idx_value & 0xf00) == 0) {
3904                         reg = ib[idx + 3] * 4;
3905                         if (!si_vm_reg_valid(reg))
3906                                 return -EINVAL;
3907                 }
3908                 break;
3909         case PACKET3_WRITE_DATA:
3910                 if ((idx_value & 0xf00) == 0) {
3911                         start_reg = ib[idx + 1] * 4;
3912                         if (idx_value & 0x10000) {
3913                                 if (!si_vm_reg_valid(start_reg))
3914                                         return -EINVAL;
3915                         } else {
3916                                 for (i = 0; i < (pkt->count - 2); i++) {
3917                                         reg = start_reg + (4 * i);
3918                                         if (!si_vm_reg_valid(reg))
3919                                                 return -EINVAL;
3920                                 }
3921                         }
3922                 }
3923                 break;
3924         case PACKET3_COND_WRITE:
3925                 if (idx_value & 0x100) {
3926                         reg = ib[idx + 5] * 4;
3927                         if (!si_vm_reg_valid(reg))
3928                                 return -EINVAL;
3929                 }
3930                 break;
3931         case PACKET3_COPY_DW:
3932                 if (idx_value & 0x2) {
3933                         reg = ib[idx + 3] * 4;
3934                         if (!si_vm_reg_valid(reg))
3935                                 return -EINVAL;
3936                 }
3937                 break;
3938         case PACKET3_SET_CONFIG_REG:
3939                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3940                 end_reg = 4 * pkt->count + start_reg - 4;
3941                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3942                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3943                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3944                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3945                         return -EINVAL;
3946                 }
3947                 for (i = 0; i < pkt->count; i++) {
3948                         reg = start_reg + (4 * i);
3949                         if (!si_vm_reg_valid(reg))
3950                                 return -EINVAL;
3951                 }
3952                 break;
3953         case PACKET3_CP_DMA:
3954                 command = ib[idx + 4];
3955                 info = ib[idx + 1];
3956                 if (command & PACKET3_CP_DMA_CMD_SAS) {
3957                         /* src address space is register */
3958                         if (((info & 0x60000000) >> 29) == 0) {
3959                                 start_reg = idx_value << 2;
3960                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3961                                         reg = start_reg;
3962                                         if (!si_vm_reg_valid(reg)) {
3963                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3964                                                 return -EINVAL;
3965                                         }
3966                                 } else {
3967                                         for (i = 0; i < (command & 0x1fffff); i++) {
3968                                                 reg = start_reg + (4 * i);
3969                                                 if (!si_vm_reg_valid(reg)) {
3970                                                         DRM_ERROR("CP DMA Bad SRC register\n");
3971                                                         return -EINVAL;
3972                                                 }
3973                                         }
3974                                 }
3975                         }
3976                 }
3977                 if (command & PACKET3_CP_DMA_CMD_DAS) {
3978                         /* dst address space is register */
3979                         if (((info & 0x00300000) >> 20) == 0) {
3980                                 start_reg = ib[idx + 2];
3981                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3982                                         reg = start_reg;
3983                                         if (!si_vm_reg_valid(reg)) {
3984                                                 DRM_ERROR("CP DMA Bad DST register\n");
3985                                                 return -EINVAL;
3986                                         }
3987                                 } else {
3988                                         for (i = 0; i < (command & 0x1fffff); i++) {
3989                                                 reg = start_reg + (4 * i);
3990                                                 if (!si_vm_reg_valid(reg)) {
3991                                                         DRM_ERROR("CP DMA Bad DST register\n");
3992                                                         return -EINVAL;
3993                                                 }
3994                                         }
3995                                 }
3996                         }
3997                 }
3998                 break;
3999         default:
4000                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4001                 return -EINVAL;
4002         }
4003         return 0;
4004 }
4005
4006 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4007                                        u32 *ib, struct radeon_cs_packet *pkt)
4008 {
4009         u32 idx = pkt->idx + 1;
4010         u32 idx_value = ib[idx];
4011         u32 start_reg, reg, i;
4012
4013         switch (pkt->opcode) {
4014         case PACKET3_NOP:
4015         case PACKET3_SET_BASE:
4016         case PACKET3_CLEAR_STATE:
4017         case PACKET3_DISPATCH_DIRECT:
4018         case PACKET3_DISPATCH_INDIRECT:
4019         case PACKET3_ALLOC_GDS:
4020         case PACKET3_WRITE_GDS_RAM:
4021         case PACKET3_ATOMIC_GDS:
4022         case PACKET3_ATOMIC:
4023         case PACKET3_OCCLUSION_QUERY:
4024         case PACKET3_SET_PREDICATION:
4025         case PACKET3_COND_EXEC:
4026         case PACKET3_PRED_EXEC:
4027         case PACKET3_CONTEXT_CONTROL:
4028         case PACKET3_STRMOUT_BUFFER_UPDATE:
4029         case PACKET3_WAIT_REG_MEM:
4030         case PACKET3_MEM_WRITE:
4031         case PACKET3_PFP_SYNC_ME:
4032         case PACKET3_SURFACE_SYNC:
4033         case PACKET3_EVENT_WRITE:
4034         case PACKET3_EVENT_WRITE_EOP:
4035         case PACKET3_EVENT_WRITE_EOS:
4036         case PACKET3_SET_CONTEXT_REG:
4037         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4038         case PACKET3_SET_SH_REG:
4039         case PACKET3_SET_SH_REG_OFFSET:
4040         case PACKET3_INCREMENT_DE_COUNTER:
4041         case PACKET3_WAIT_ON_CE_COUNTER:
4042         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4043         case PACKET3_ME_WRITE:
4044                 break;
4045         case PACKET3_COPY_DATA:
4046                 if ((idx_value & 0xf00) == 0) {
4047                         reg = ib[idx + 3] * 4;
4048                         if (!si_vm_reg_valid(reg))
4049                                 return -EINVAL;
4050                 }
4051                 break;
4052         case PACKET3_WRITE_DATA:
4053                 if ((idx_value & 0xf00) == 0) {
4054                         start_reg = ib[idx + 1] * 4;
4055                         if (idx_value & 0x10000) {
4056                                 if (!si_vm_reg_valid(start_reg))
4057                                         return -EINVAL;
4058                         } else {
4059                                 for (i = 0; i < (pkt->count - 2); i++) {
4060                                         reg = start_reg + (4 * i);
4061                                         if (!si_vm_reg_valid(reg))
4062                                                 return -EINVAL;
4063                                 }
4064                         }
4065                 }
4066                 break;
4067         case PACKET3_COND_WRITE:
4068                 if (idx_value & 0x100) {
4069                         reg = ib[idx + 5] * 4;
4070                         if (!si_vm_reg_valid(reg))
4071                                 return -EINVAL;
4072                 }
4073                 break;
4074         case PACKET3_COPY_DW:
4075                 if (idx_value & 0x2) {
4076                         reg = ib[idx + 3] * 4;
4077                         if (!si_vm_reg_valid(reg))
4078                                 return -EINVAL;
4079                 }
4080                 break;
4081         default:
4082                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4083                 return -EINVAL;
4084         }
4085         return 0;
4086 }
4087
4088 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4089 {
4090         int ret = 0;
4091         u32 idx = 0;
4092         struct radeon_cs_packet pkt;
4093
4094         do {
4095                 pkt.idx = idx;
4096                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4097                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4098                 pkt.one_reg_wr = 0;
4099                 switch (pkt.type) {
4100                 case RADEON_PACKET_TYPE0:
4101                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4102                         ret = -EINVAL;
4103                         break;
4104                 case RADEON_PACKET_TYPE2:
4105                         idx += 1;
4106                         break;
4107                 case RADEON_PACKET_TYPE3:
4108                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4109                         if (ib->is_const_ib)
4110                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4111                         else {
4112                                 switch (ib->ring) {
4113                                 case RADEON_RING_TYPE_GFX_INDEX:
4114                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4115                                         break;
4116                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4117                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4118                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4119                                         break;
4120                                 default:
4121                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4122                                         ret = -EINVAL;
4123                                         break;
4124                                 }
4125                         }
4126                         idx += pkt.count + 2;
4127                         break;
4128                 default:
4129                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4130                         ret = -EINVAL;
4131                         break;
4132                 }
4133                 if (ret)
4134                         break;
4135         } while (idx < ib->length_dw);
4136
4137         return ret;
4138 }
4139
4140 /*
4141  * vm
4142  */
4143 int si_vm_init(struct radeon_device *rdev)
4144 {
4145         /* number of VMs */
4146         rdev->vm_manager.nvm = 16;
4147         /* base offset of vram pages */
4148         rdev->vm_manager.vram_base_offset = 0;
4149
4150         return 0;
4151 }
4152
4153 void si_vm_fini(struct radeon_device *rdev)
4154 {
4155 }
4156
4157 /**
4158  * si_vm_set_page - update the page tables using the CP
4159  *
4160  * @rdev: radeon_device pointer
4161  * @ib: indirect buffer to fill with commands
4162  * @pe: addr of the page entry
4163  * @addr: dst addr to write into pe
4164  * @count: number of page entries to update
4165  * @incr: increase next addr by incr bytes
4166  * @flags: access flags
4167  *
4168  * Update the page tables using the CP (SI).
4169  */
4170 void si_vm_set_page(struct radeon_device *rdev,
4171                     struct radeon_ib *ib,
4172                     uint64_t pe,
4173                     uint64_t addr, unsigned count,
4174                     uint32_t incr, uint32_t flags)
4175 {
4176         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4177         uint64_t value;
4178         unsigned ndw;
4179
4180         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4181                 while (count) {
4182                         ndw = 2 + count * 2;
4183                         if (ndw > 0x3FFE)
4184                                 ndw = 0x3FFE;
4185
4186                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4187                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4188                                         WRITE_DATA_DST_SEL(1));
4189                         ib->ptr[ib->length_dw++] = pe;
4190                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4191                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4192                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4193                                         value = radeon_vm_map_gart(rdev, addr);
4194                                         value &= 0xFFFFFFFFFFFFF000ULL;
4195                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4196                                         value = addr;
4197                                 } else {
4198                                         value = 0;
4199                                 }
4200                                 addr += incr;
4201                                 value |= r600_flags;
4202                                 ib->ptr[ib->length_dw++] = value;
4203                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4204                         }
4205                 }
4206         } else {
4207                 /* DMA */
4208                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4209                         while (count) {
4210                                 ndw = count * 2;
4211                                 if (ndw > 0xFFFFE)
4212                                         ndw = 0xFFFFE;
4213
4214                                 /* for non-physically contiguous pages (system) */
4215                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4216                                 ib->ptr[ib->length_dw++] = pe;
4217                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4218                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4219                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4220                                                 value = radeon_vm_map_gart(rdev, addr);
4221                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4222                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4223                                                 value = addr;
4224                                         } else {
4225                                                 value = 0;
4226                                         }
4227                                         addr += incr;
4228                                         value |= r600_flags;
4229                                         ib->ptr[ib->length_dw++] = value;
4230                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4231                                 }
4232                         }
4233                 } else {
4234                         while (count) {
4235                                 ndw = count * 2;
4236                                 if (ndw > 0xFFFFE)
4237                                         ndw = 0xFFFFE;
4238
4239                                 if (flags & RADEON_VM_PAGE_VALID)
4240                                         value = addr;
4241                                 else
4242                                         value = 0;
4243                                 /* for physically contiguous pages (vram) */
4244                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4245                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4246                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4247                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4248                                 ib->ptr[ib->length_dw++] = 0;
4249                                 ib->ptr[ib->length_dw++] = value; /* value */
4250                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4251                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4252                                 ib->ptr[ib->length_dw++] = 0;
4253                                 pe += ndw * 4;
4254                                 addr += (ndw / 2) * incr;
4255                                 count -= ndw / 2;
4256                         }
4257                 }
4258                 while (ib->length_dw & 0x7)
4259                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4260         }
4261 }
4262
4263 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4264 {
4265         struct radeon_ring *ring = &rdev->ring[ridx];
4266
4267         if (vm == NULL)
4268                 return;
4269
4270         /* write new base address */
4271         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4272         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4273                                  WRITE_DATA_DST_SEL(0)));
4274
4275         if (vm->id < 8) {
4276                 radeon_ring_write(ring,
4277                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4278         } else {
4279                 radeon_ring_write(ring,
4280                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4281         }
4282         radeon_ring_write(ring, 0);
4283         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4284
4285         /* flush hdp cache */
4286         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4287         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4288                                  WRITE_DATA_DST_SEL(0)));
4289         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4290         radeon_ring_write(ring, 0);
4291         radeon_ring_write(ring, 0x1);
4292
4293         /* bits 0-15 are the VM contexts0-15 */
4294         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4295         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4296                                  WRITE_DATA_DST_SEL(0)));
4297         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4298         radeon_ring_write(ring, 0);
4299         radeon_ring_write(ring, 1 << vm->id);
4300
4301         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4302         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4303         radeon_ring_write(ring, 0x0);
4304 }
4305
4306 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4307 {
4308         struct radeon_ring *ring = &rdev->ring[ridx];
4309
4310         if (vm == NULL)
4311                 return;
4312
4313         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4314         if (vm->id < 8) {
4315                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4316         } else {
4317                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4318         }
4319         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4320
4321         /* flush hdp cache */
4322         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4323         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4324         radeon_ring_write(ring, 1);
4325
4326         /* bits 0-7 are the VM contexts0-7 */
4327         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4328         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4329         radeon_ring_write(ring, 1 << vm->id);
4330 }
4331
4332 /*
4333  * RLC
4334  */
4335 void si_rlc_fini(struct radeon_device *rdev)
4336 {
4337         int r;
4338
4339         /* save restore block */
4340         if (rdev->rlc.save_restore_obj) {
4341                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4342                 if (unlikely(r != 0))
4343                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4344                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4345                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4346
4347                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4348                 rdev->rlc.save_restore_obj = NULL;
4349         }
4350
4351         /* clear state block */
4352         if (rdev->rlc.clear_state_obj) {
4353                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4354                 if (unlikely(r != 0))
4355                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4356                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4357                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4358
4359                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4360                 rdev->rlc.clear_state_obj = NULL;
4361         }
4362 }
4363
4364 int si_rlc_init(struct radeon_device *rdev)
4365 {
4366         int r;
4367
4368         /* save restore block */
4369         if (rdev->rlc.save_restore_obj == NULL) {
4370                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4371                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4372                                      &rdev->rlc.save_restore_obj);
4373                 if (r) {
4374                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4375                         return r;
4376                 }
4377         }
4378
4379         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4380         if (unlikely(r != 0)) {
4381                 si_rlc_fini(rdev);
4382                 return r;
4383         }
4384         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4385                           &rdev->rlc.save_restore_gpu_addr);
4386         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4387         if (r) {
4388                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4389                 si_rlc_fini(rdev);
4390                 return r;
4391         }
4392
4393         /* clear state block */
4394         if (rdev->rlc.clear_state_obj == NULL) {
4395                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4396                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4397                                      &rdev->rlc.clear_state_obj);
4398                 if (r) {
4399                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4400                         si_rlc_fini(rdev);
4401                         return r;
4402                 }
4403         }
4404         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4405         if (unlikely(r != 0)) {
4406                 si_rlc_fini(rdev);
4407                 return r;
4408         }
4409         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4410                           &rdev->rlc.clear_state_gpu_addr);
4411         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4412         if (r) {
4413                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4414                 si_rlc_fini(rdev);
4415                 return r;
4416         }
4417
4418         return 0;
4419 }
4420
4421 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4422                                          bool enable)
4423 {
4424         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4425         u32 mask;
4426         int i;
4427
4428         if (enable)
4429                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4430         else
4431                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4432         WREG32(CP_INT_CNTL_RING0, tmp);
4433
4434         if (!enable) {
4435                 /* read a gfx register */
4436                 tmp = RREG32(DB_DEPTH_INFO);
4437
4438                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4439                 for (i = 0; i < rdev->usec_timeout; i++) {
4440                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4441                                 break;
4442                         udelay(1);
4443                 }
4444         }
4445 }
4446
4447 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4448 {
4449         int i;
4450
4451         for (i = 0; i < rdev->usec_timeout; i++) {
4452                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4453                         break;
4454                 udelay(1);
4455         }
4456
4457         for (i = 0; i < rdev->usec_timeout; i++) {
4458                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4459                         break;
4460                 udelay(1);
4461         }
4462 }
4463
4464 static void si_rlc_stop(struct radeon_device *rdev)
4465 {
4466         WREG32(RLC_CNTL, 0);
4467
4468         si_enable_gui_idle_interrupt(rdev, false);
4469
4470         si_wait_for_rlc_serdes(rdev);
4471 }
4472
4473 static void si_rlc_start(struct radeon_device *rdev)
4474 {
4475         WREG32(RLC_CNTL, RLC_ENABLE);
4476
4477         si_enable_gui_idle_interrupt(rdev, true);
4478
4479         udelay(50);
4480 }
4481
4482 static bool si_lbpw_supported(struct radeon_device *rdev)
4483 {
4484         u32 tmp;
4485
4486         /* Enable LBPW only for DDR3 */
4487         tmp = RREG32(MC_SEQ_MISC0);
4488         if ((tmp & 0xF0000000) == 0xB0000000)
4489                 return true;
4490         return false;
4491 }
4492
4493 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
4494 {
4495         u32 tmp;
4496
4497         tmp = RREG32(RLC_LB_CNTL);
4498         if (enable)
4499                 tmp |= LOAD_BALANCE_ENABLE;
4500         else
4501                 tmp &= ~LOAD_BALANCE_ENABLE;
4502         WREG32(RLC_LB_CNTL, tmp);
4503
4504         if (!enable) {
4505                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4506                 WREG32(SPI_LB_CU_MASK, 0x00ff);
4507         }
4508 }
4509
4510 static int si_rlc_resume(struct radeon_device *rdev)
4511 {
4512         u32 i;
4513         const __be32 *fw_data;
4514
4515         if (!rdev->rlc_fw)
4516                 return -EINVAL;
4517
4518         si_rlc_stop(rdev);
4519
4520         WREG32(RLC_RL_BASE, 0);
4521         WREG32(RLC_RL_SIZE, 0);
4522         WREG32(RLC_LB_CNTL, 0);
4523         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4524         WREG32(RLC_LB_CNTR_INIT, 0);
4525         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4526
4527         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4528         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4529
4530         WREG32(RLC_MC_CNTL, 0);
4531         WREG32(RLC_UCODE_CNTL, 0);
4532
4533         fw_data = (const __be32 *)rdev->rlc_fw->data;
4534         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4535                 WREG32(RLC_UCODE_ADDR, i);
4536                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4537         }
4538         WREG32(RLC_UCODE_ADDR, 0);
4539
4540         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
4541
4542         si_rlc_start(rdev);
4543
4544         return 0;
4545 }
4546
4547 static void si_enable_interrupts(struct radeon_device *rdev)
4548 {
4549         u32 ih_cntl = RREG32(IH_CNTL);
4550         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4551
4552         ih_cntl |= ENABLE_INTR;
4553         ih_rb_cntl |= IH_RB_ENABLE;
4554         WREG32(IH_CNTL, ih_cntl);
4555         WREG32(IH_RB_CNTL, ih_rb_cntl);
4556         rdev->ih.enabled = true;
4557 }
4558
4559 static void si_disable_interrupts(struct radeon_device *rdev)
4560 {
4561         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4562         u32 ih_cntl = RREG32(IH_CNTL);
4563
4564         ih_rb_cntl &= ~IH_RB_ENABLE;
4565         ih_cntl &= ~ENABLE_INTR;
4566         WREG32(IH_RB_CNTL, ih_rb_cntl);
4567         WREG32(IH_CNTL, ih_cntl);
4568         /* set rptr, wptr to 0 */
4569         WREG32(IH_RB_RPTR, 0);
4570         WREG32(IH_RB_WPTR, 0);
4571         rdev->ih.enabled = false;
4572         rdev->ih.rptr = 0;
4573 }
4574
4575 static void si_disable_interrupt_state(struct radeon_device *rdev)
4576 {
4577         u32 tmp;
4578
4579         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4580         WREG32(CP_INT_CNTL_RING1, 0);
4581         WREG32(CP_INT_CNTL_RING2, 0);
4582         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4583         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4584         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4585         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4586         WREG32(GRBM_INT_CNTL, 0);
4587         if (rdev->num_crtc >= 2) {
4588                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4589                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4590         }
4591         if (rdev->num_crtc >= 4) {
4592                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4593                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4594         }
4595         if (rdev->num_crtc >= 6) {
4596                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4597                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4598         }
4599
4600         if (rdev->num_crtc >= 2) {
4601                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4602                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4603         }
4604         if (rdev->num_crtc >= 4) {
4605                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4606                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4607         }
4608         if (rdev->num_crtc >= 6) {
4609                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4610                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4611         }
4612
4613         if (!ASIC_IS_NODCE(rdev)) {
4614                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4615
4616                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4617                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4618                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4619                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4620                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4621                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4622                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4623                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4624                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4625                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4626                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4627                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4628         }
4629 }
4630
4631 static int si_irq_init(struct radeon_device *rdev)
4632 {
4633         int ret = 0;
4634         int rb_bufsz;
4635         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4636
4637         /* allocate ring */
4638         ret = r600_ih_ring_alloc(rdev);
4639         if (ret)
4640                 return ret;
4641
4642         /* disable irqs */
4643         si_disable_interrupts(rdev);
4644
4645         /* init rlc */
4646         ret = si_rlc_resume(rdev);
4647         if (ret) {
4648                 r600_ih_ring_fini(rdev);
4649                 return ret;
4650         }
4651
4652         /* setup interrupt control */
4653         /* set dummy read address to ring address */
4654         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4655         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4656         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4657          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4658          */
4659         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4660         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4661         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4662         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4663
4664         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4665         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4666
4667         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4668                       IH_WPTR_OVERFLOW_CLEAR |
4669                       (rb_bufsz << 1));
4670
4671         if (rdev->wb.enabled)
4672                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4673
4674         /* set the writeback address whether it's enabled or not */
4675         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4676         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4677
4678         WREG32(IH_RB_CNTL, ih_rb_cntl);
4679
4680         /* set rptr, wptr to 0 */
4681         WREG32(IH_RB_RPTR, 0);
4682         WREG32(IH_RB_WPTR, 0);
4683
4684         /* Default settings for IH_CNTL (disabled at first) */
4685         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4686         /* RPTR_REARM only works if msi's are enabled */
4687         if (rdev->msi_enabled)
4688                 ih_cntl |= RPTR_REARM;
4689         WREG32(IH_CNTL, ih_cntl);
4690
4691         /* force the active interrupt state to all disabled */
4692         si_disable_interrupt_state(rdev);
4693
4694         pci_set_master(rdev->pdev);
4695
4696         /* enable irqs */
4697         si_enable_interrupts(rdev);
4698
4699         return ret;
4700 }
4701
4702 int si_irq_set(struct radeon_device *rdev)
4703 {
4704         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4705         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4706         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4707         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4708         u32 grbm_int_cntl = 0;
4709         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4710         u32 dma_cntl, dma_cntl1;
4711
4712         if (!rdev->irq.installed) {
4713                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4714                 return -EINVAL;
4715         }
4716         /* don't enable anything if the ih is disabled */
4717         if (!rdev->ih.enabled) {
4718                 si_disable_interrupts(rdev);
4719                 /* force the active interrupt state to all disabled */
4720                 si_disable_interrupt_state(rdev);
4721                 return 0;
4722         }
4723
4724         if (!ASIC_IS_NODCE(rdev)) {
4725                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4726                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4727                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4728                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4729                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4730                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4731         }
4732
4733         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4734         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4735
4736         /* enable CP interrupts on all rings */
4737         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4738                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4739                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4740         }
4741         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4742                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4743                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4744         }
4745         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4746                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4747                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4748         }
4749         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4750                 DRM_DEBUG("si_irq_set: sw int dma\n");
4751                 dma_cntl |= TRAP_ENABLE;
4752         }
4753
4754         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4755                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4756                 dma_cntl1 |= TRAP_ENABLE;
4757         }
4758         if (rdev->irq.crtc_vblank_int[0] ||
4759             atomic_read(&rdev->irq.pflip[0])) {
4760                 DRM_DEBUG("si_irq_set: vblank 0\n");
4761                 crtc1 |= VBLANK_INT_MASK;
4762         }
4763         if (rdev->irq.crtc_vblank_int[1] ||
4764             atomic_read(&rdev->irq.pflip[1])) {
4765                 DRM_DEBUG("si_irq_set: vblank 1\n");
4766                 crtc2 |= VBLANK_INT_MASK;
4767         }
4768         if (rdev->irq.crtc_vblank_int[2] ||
4769             atomic_read(&rdev->irq.pflip[2])) {
4770                 DRM_DEBUG("si_irq_set: vblank 2\n");
4771                 crtc3 |= VBLANK_INT_MASK;
4772         }
4773         if (rdev->irq.crtc_vblank_int[3] ||
4774             atomic_read(&rdev->irq.pflip[3])) {
4775                 DRM_DEBUG("si_irq_set: vblank 3\n");
4776                 crtc4 |= VBLANK_INT_MASK;
4777         }
4778         if (rdev->irq.crtc_vblank_int[4] ||
4779             atomic_read(&rdev->irq.pflip[4])) {
4780                 DRM_DEBUG("si_irq_set: vblank 4\n");
4781                 crtc5 |= VBLANK_INT_MASK;
4782         }
4783         if (rdev->irq.crtc_vblank_int[5] ||
4784             atomic_read(&rdev->irq.pflip[5])) {
4785                 DRM_DEBUG("si_irq_set: vblank 5\n");
4786                 crtc6 |= VBLANK_INT_MASK;
4787         }
4788         if (rdev->irq.hpd[0]) {
4789                 DRM_DEBUG("si_irq_set: hpd 1\n");
4790                 hpd1 |= DC_HPDx_INT_EN;
4791         }
4792         if (rdev->irq.hpd[1]) {
4793                 DRM_DEBUG("si_irq_set: hpd 2\n");
4794                 hpd2 |= DC_HPDx_INT_EN;
4795         }
4796         if (rdev->irq.hpd[2]) {
4797                 DRM_DEBUG("si_irq_set: hpd 3\n");
4798                 hpd3 |= DC_HPDx_INT_EN;
4799         }
4800         if (rdev->irq.hpd[3]) {
4801                 DRM_DEBUG("si_irq_set: hpd 4\n");
4802                 hpd4 |= DC_HPDx_INT_EN;
4803         }
4804         if (rdev->irq.hpd[4]) {
4805                 DRM_DEBUG("si_irq_set: hpd 5\n");
4806                 hpd5 |= DC_HPDx_INT_EN;
4807         }
4808         if (rdev->irq.hpd[5]) {
4809                 DRM_DEBUG("si_irq_set: hpd 6\n");
4810                 hpd6 |= DC_HPDx_INT_EN;
4811         }
4812
4813         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4814         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4815         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4816
4817         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4818         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4819
4820         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4821
4822         if (rdev->num_crtc >= 2) {
4823                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4824                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4825         }
4826         if (rdev->num_crtc >= 4) {
4827                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4828                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4829         }
4830         if (rdev->num_crtc >= 6) {
4831                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4832                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4833         }
4834
4835         if (rdev->num_crtc >= 2) {
4836                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4837                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4838         }
4839         if (rdev->num_crtc >= 4) {
4840                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4841                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4842         }
4843         if (rdev->num_crtc >= 6) {
4844                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4845                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4846         }
4847
4848         if (!ASIC_IS_NODCE(rdev)) {
4849                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4850                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4851                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4852                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4853                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4854                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4855         }
4856
4857         return 0;
4858 }
4859
4860 static inline void si_irq_ack(struct radeon_device *rdev)
4861 {
4862         u32 tmp;
4863
4864         if (ASIC_IS_NODCE(rdev))
4865                 return;
4866
4867         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4868         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4869         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4870         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4871         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4872         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4873         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4874         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4875         if (rdev->num_crtc >= 4) {
4876                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4877                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4878         }
4879         if (rdev->num_crtc >= 6) {
4880                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4881                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4882         }
4883
4884         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4885                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4886         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4887                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4888         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4889                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4890         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4891                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4892         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4893                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4894         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4895                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4896
4897         if (rdev->num_crtc >= 4) {
4898                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4899                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4900                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4901                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4902                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4903                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4904                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4905                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4906                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4907                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4908                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4909                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4910         }
4911
4912         if (rdev->num_crtc >= 6) {
4913                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4914                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4915                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4916                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4917                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4918                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4919                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4920                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4921                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4922                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4923                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4924                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4925         }
4926
4927         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4928                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4929                 tmp |= DC_HPDx_INT_ACK;
4930                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4931         }
4932         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4933                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4934                 tmp |= DC_HPDx_INT_ACK;
4935                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4936         }
4937         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4938                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4939                 tmp |= DC_HPDx_INT_ACK;
4940                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4941         }
4942         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4943                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4944                 tmp |= DC_HPDx_INT_ACK;
4945                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4946         }
4947         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4948                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4949                 tmp |= DC_HPDx_INT_ACK;
4950                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4951         }
4952         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4953                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4954                 tmp |= DC_HPDx_INT_ACK;
4955                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4956         }
4957 }
4958
4959 static void si_irq_disable(struct radeon_device *rdev)
4960 {
4961         si_disable_interrupts(rdev);
4962         /* Wait and acknowledge irq */
4963         mdelay(1);
4964         si_irq_ack(rdev);
4965         si_disable_interrupt_state(rdev);
4966 }
4967
4968 static void si_irq_suspend(struct radeon_device *rdev)
4969 {
4970         si_irq_disable(rdev);
4971         si_rlc_stop(rdev);
4972 }
4973
4974 static void si_irq_fini(struct radeon_device *rdev)
4975 {
4976         si_irq_suspend(rdev);
4977         r600_ih_ring_fini(rdev);
4978 }
4979
4980 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4981 {
4982         u32 wptr, tmp;
4983
4984         if (rdev->wb.enabled)
4985                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4986         else
4987                 wptr = RREG32(IH_RB_WPTR);
4988
4989         if (wptr & RB_OVERFLOW) {
4990                 /* When a ring buffer overflow happen start parsing interrupt
4991                  * from the last not overwritten vector (wptr + 16). Hopefully
4992                  * this should allow us to catchup.
4993                  */
4994                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4995                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4996                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4997                 tmp = RREG32(IH_RB_CNTL);
4998                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4999                 WREG32(IH_RB_CNTL, tmp);
5000         }
5001         return (wptr & rdev->ih.ptr_mask);
5002 }
5003
5004 /*        SI IV Ring
5005  * Each IV ring entry is 128 bits:
5006  * [7:0]    - interrupt source id
5007  * [31:8]   - reserved
5008  * [59:32]  - interrupt source data
5009  * [63:60]  - reserved
5010  * [71:64]  - RINGID
5011  * [79:72]  - VMID
5012  * [127:80] - reserved
5013  */
5014 int si_irq_process(struct radeon_device *rdev)
5015 {
5016         u32 wptr;
5017         u32 rptr;
5018         u32 src_id, src_data, ring_id;
5019         u32 ring_index;
5020         bool queue_hotplug = false;
5021
5022         if (!rdev->ih.enabled || rdev->shutdown)
5023                 return IRQ_NONE;
5024
5025         wptr = si_get_ih_wptr(rdev);
5026
5027 restart_ih:
5028         /* is somebody else already processing irqs? */
5029         if (atomic_xchg(&rdev->ih.lock, 1))
5030                 return IRQ_NONE;
5031
5032         rptr = rdev->ih.rptr;
5033         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5034
5035         /* Order reading of wptr vs. reading of IH ring data */
5036         rmb();
5037
5038         /* display interrupts */
5039         si_irq_ack(rdev);
5040
5041         while (rptr != wptr) {
5042                 /* wptr/rptr are in bytes! */
5043                 ring_index = rptr / 4;
5044                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5045                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5046                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5047
5048                 switch (src_id) {
5049                 case 1: /* D1 vblank/vline */
5050                         switch (src_data) {
5051                         case 0: /* D1 vblank */
5052                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
5053                                         if (rdev->irq.crtc_vblank_int[0]) {
5054                                                 drm_handle_vblank(rdev->ddev, 0);
5055                                                 rdev->pm.vblank_sync = true;
5056                                                 wake_up(&rdev->irq.vblank_queue);
5057                                         }
5058                                         if (atomic_read(&rdev->irq.pflip[0]))
5059                                                 radeon_crtc_handle_flip(rdev, 0);
5060                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5061                                         DRM_DEBUG("IH: D1 vblank\n");
5062                                 }
5063                                 break;
5064                         case 1: /* D1 vline */
5065                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
5066                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5067                                         DRM_DEBUG("IH: D1 vline\n");
5068                                 }
5069                                 break;
5070                         default:
5071                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5072                                 break;
5073                         }
5074                         break;
5075                 case 2: /* D2 vblank/vline */
5076                         switch (src_data) {
5077                         case 0: /* D2 vblank */
5078                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5079                                         if (rdev->irq.crtc_vblank_int[1]) {
5080                                                 drm_handle_vblank(rdev->ddev, 1);
5081                                                 rdev->pm.vblank_sync = true;
5082                                                 wake_up(&rdev->irq.vblank_queue);
5083                                         }
5084                                         if (atomic_read(&rdev->irq.pflip[1]))
5085                                                 radeon_crtc_handle_flip(rdev, 1);
5086                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5087                                         DRM_DEBUG("IH: D2 vblank\n");
5088                                 }
5089                                 break;
5090                         case 1: /* D2 vline */
5091                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5092                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5093                                         DRM_DEBUG("IH: D2 vline\n");
5094                                 }
5095                                 break;
5096                         default:
5097                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5098                                 break;
5099                         }
5100                         break;
5101                 case 3: /* D3 vblank/vline */
5102                         switch (src_data) {
5103                         case 0: /* D3 vblank */
5104                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5105                                         if (rdev->irq.crtc_vblank_int[2]) {
5106                                                 drm_handle_vblank(rdev->ddev, 2);
5107                                                 rdev->pm.vblank_sync = true;
5108                                                 wake_up(&rdev->irq.vblank_queue);
5109                                         }
5110                                         if (atomic_read(&rdev->irq.pflip[2]))
5111                                                 radeon_crtc_handle_flip(rdev, 2);
5112                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5113                                         DRM_DEBUG("IH: D3 vblank\n");
5114                                 }
5115                                 break;
5116                         case 1: /* D3 vline */
5117                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5118                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5119                                         DRM_DEBUG("IH: D3 vline\n");
5120                                 }
5121                                 break;
5122                         default:
5123                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5124                                 break;
5125                         }
5126                         break;
5127                 case 4: /* D4 vblank/vline */
5128                         switch (src_data) {
5129                         case 0: /* D4 vblank */
5130                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5131                                         if (rdev->irq.crtc_vblank_int[3]) {
5132                                                 drm_handle_vblank(rdev->ddev, 3);
5133                                                 rdev->pm.vblank_sync = true;
5134                                                 wake_up(&rdev->irq.vblank_queue);
5135                                         }
5136                                         if (atomic_read(&rdev->irq.pflip[3]))
5137                                                 radeon_crtc_handle_flip(rdev, 3);
5138                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5139                                         DRM_DEBUG("IH: D4 vblank\n");
5140                                 }
5141                                 break;
5142                         case 1: /* D4 vline */
5143                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5144                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5145                                         DRM_DEBUG("IH: D4 vline\n");
5146                                 }
5147                                 break;
5148                         default:
5149                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5150                                 break;
5151                         }
5152                         break;
5153                 case 5: /* D5 vblank/vline */
5154                         switch (src_data) {
5155                         case 0: /* D5 vblank */
5156                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5157                                         if (rdev->irq.crtc_vblank_int[4]) {
5158                                                 drm_handle_vblank(rdev->ddev, 4);
5159                                                 rdev->pm.vblank_sync = true;
5160                                                 wake_up(&rdev->irq.vblank_queue);
5161                                         }
5162                                         if (atomic_read(&rdev->irq.pflip[4]))
5163                                                 radeon_crtc_handle_flip(rdev, 4);
5164                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5165                                         DRM_DEBUG("IH: D5 vblank\n");
5166                                 }
5167                                 break;
5168                         case 1: /* D5 vline */
5169                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5170                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5171                                         DRM_DEBUG("IH: D5 vline\n");
5172                                 }
5173                                 break;
5174                         default:
5175                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5176                                 break;
5177                         }
5178                         break;
5179                 case 6: /* D6 vblank/vline */
5180                         switch (src_data) {
5181                         case 0: /* D6 vblank */
5182                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5183                                         if (rdev->irq.crtc_vblank_int[5]) {
5184                                                 drm_handle_vblank(rdev->ddev, 5);
5185                                                 rdev->pm.vblank_sync = true;
5186                                                 wake_up(&rdev->irq.vblank_queue);
5187                                         }
5188                                         if (atomic_read(&rdev->irq.pflip[5]))
5189                                                 radeon_crtc_handle_flip(rdev, 5);
5190                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5191                                         DRM_DEBUG("IH: D6 vblank\n");
5192                                 }
5193                                 break;
5194                         case 1: /* D6 vline */
5195                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5196                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5197                                         DRM_DEBUG("IH: D6 vline\n");
5198                                 }
5199                                 break;
5200                         default:
5201                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5202                                 break;
5203                         }
5204                         break;
5205                 case 42: /* HPD hotplug */
5206                         switch (src_data) {
5207                         case 0:
5208                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5209                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5210                                         queue_hotplug = true;
5211                                         DRM_DEBUG("IH: HPD1\n");
5212                                 }
5213                                 break;
5214                         case 1:
5215                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5216                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5217                                         queue_hotplug = true;
5218                                         DRM_DEBUG("IH: HPD2\n");
5219                                 }
5220                                 break;
5221                         case 2:
5222                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5223                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5224                                         queue_hotplug = true;
5225                                         DRM_DEBUG("IH: HPD3\n");
5226                                 }
5227                                 break;
5228                         case 3:
5229                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5230                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5231                                         queue_hotplug = true;
5232                                         DRM_DEBUG("IH: HPD4\n");
5233                                 }
5234                                 break;
5235                         case 4:
5236                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5237                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5238                                         queue_hotplug = true;
5239                                         DRM_DEBUG("IH: HPD5\n");
5240                                 }
5241                                 break;
5242                         case 5:
5243                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5244                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5245                                         queue_hotplug = true;
5246                                         DRM_DEBUG("IH: HPD6\n");
5247                                 }
5248                                 break;
5249                         default:
5250                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5251                                 break;
5252                         }
5253                         break;
5254                 case 146:
5255                 case 147:
5256                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5257                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5258                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5259                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5260                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5261                         /* reset addr and status */
5262                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5263                         break;
5264                 case 176: /* RINGID0 CP_INT */
5265                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5266                         break;
5267                 case 177: /* RINGID1 CP_INT */
5268                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5269                         break;
5270                 case 178: /* RINGID2 CP_INT */
5271                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5272                         break;
5273                 case 181: /* CP EOP event */
5274                         DRM_DEBUG("IH: CP EOP\n");
5275                         switch (ring_id) {
5276                         case 0:
5277                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5278                                 break;
5279                         case 1:
5280                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5281                                 break;
5282                         case 2:
5283                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5284                                 break;
5285                         }
5286                         break;
5287                 case 224: /* DMA trap event */
5288                         DRM_DEBUG("IH: DMA trap\n");
5289                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5290                         break;
5291                 case 233: /* GUI IDLE */
5292                         DRM_DEBUG("IH: GUI idle\n");
5293                         break;
5294                 case 244: /* DMA trap event */
5295                         DRM_DEBUG("IH: DMA1 trap\n");
5296                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5297                         break;
5298                 default:
5299                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5300                         break;
5301                 }
5302
5303                 /* wptr/rptr are in bytes! */
5304                 rptr += 16;
5305                 rptr &= rdev->ih.ptr_mask;
5306         }
5307         if (queue_hotplug)
5308                 schedule_work(&rdev->hotplug_work);
5309         rdev->ih.rptr = rptr;
5310         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5311         atomic_set(&rdev->ih.lock, 0);
5312
5313         /* make sure wptr hasn't changed while processing */
5314         wptr = si_get_ih_wptr(rdev);
5315         if (wptr != rptr)
5316                 goto restart_ih;
5317
5318         return IRQ_HANDLED;
5319 }
5320
5321 /**
5322  * si_copy_dma - copy pages using the DMA engine
5323  *
5324  * @rdev: radeon_device pointer
5325  * @src_offset: src GPU address
5326  * @dst_offset: dst GPU address
5327  * @num_gpu_pages: number of GPU pages to xfer
5328  * @fence: radeon fence object
5329  *
5330  * Copy GPU paging using the DMA engine (SI).
5331  * Used by the radeon ttm implementation to move pages if
5332  * registered as the asic copy callback.
5333  */
5334 int si_copy_dma(struct radeon_device *rdev,
5335                 uint64_t src_offset, uint64_t dst_offset,
5336                 unsigned num_gpu_pages,
5337                 struct radeon_fence **fence)
5338 {
5339         struct radeon_semaphore *sem = NULL;
5340         int ring_index = rdev->asic->copy.dma_ring_index;
5341         struct radeon_ring *ring = &rdev->ring[ring_index];
5342         u32 size_in_bytes, cur_size_in_bytes;
5343         int i, num_loops;
5344         int r = 0;
5345
5346         r = radeon_semaphore_create(rdev, &sem);
5347         if (r) {
5348                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5349                 return r;
5350         }
5351
5352         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5353         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5354         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5355         if (r) {
5356                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5357                 radeon_semaphore_free(rdev, &sem, NULL);
5358                 return r;
5359         }
5360
5361         if (radeon_fence_need_sync(*fence, ring->idx)) {
5362                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5363                                             ring->idx);
5364                 radeon_fence_note_sync(*fence, ring->idx);
5365         } else {
5366                 radeon_semaphore_free(rdev, &sem, NULL);
5367         }
5368
5369         for (i = 0; i < num_loops; i++) {
5370                 cur_size_in_bytes = size_in_bytes;
5371                 if (cur_size_in_bytes > 0xFFFFF)
5372                         cur_size_in_bytes = 0xFFFFF;
5373                 size_in_bytes -= cur_size_in_bytes;
5374                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5375                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5376                 radeon_ring_write(ring, src_offset & 0xffffffff);
5377                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5378                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5379                 src_offset += cur_size_in_bytes;
5380                 dst_offset += cur_size_in_bytes;
5381         }
5382
5383         r = radeon_fence_emit(rdev, fence, ring->idx);
5384         if (r) {
5385                 radeon_ring_unlock_undo(rdev, ring);
5386                 return r;
5387         }
5388
5389         radeon_ring_unlock_commit(rdev, ring);
5390         radeon_semaphore_free(rdev, &sem, *fence);
5391
5392         return r;
5393 }
5394
5395 /*
5396  * startup/shutdown callbacks
5397  */
5398 static int si_startup(struct radeon_device *rdev)
5399 {
5400         struct radeon_ring *ring;
5401         int r;
5402
5403         /* enable pcie gen2/3 link */
5404         si_pcie_gen3_enable(rdev);
5405         /* enable aspm */
5406         si_program_aspm(rdev);
5407
5408         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5409             !rdev->rlc_fw || !rdev->mc_fw) {
5410                 r = si_init_microcode(rdev);
5411                 if (r) {
5412                         DRM_ERROR("Failed to load firmware!\n");
5413                         return r;
5414                 }
5415         }
5416
5417         r = si_mc_load_microcode(rdev);
5418         if (r) {
5419                 DRM_ERROR("Failed to load MC firmware!\n");
5420                 return r;
5421         }
5422
5423         r = r600_vram_scratch_init(rdev);
5424         if (r)
5425                 return r;
5426
5427         si_mc_program(rdev);
5428         r = si_pcie_gart_enable(rdev);
5429         if (r)
5430                 return r;
5431         si_gpu_init(rdev);
5432
5433         /* allocate rlc buffers */
5434         r = si_rlc_init(rdev);
5435         if (r) {
5436                 DRM_ERROR("Failed to init rlc BOs!\n");
5437                 return r;
5438         }
5439
5440         /* allocate wb buffer */
5441         r = radeon_wb_init(rdev);
5442         if (r)
5443                 return r;
5444
5445         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5446         if (r) {
5447                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5448                 return r;
5449         }
5450
5451         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5452         if (r) {
5453                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5454                 return r;
5455         }
5456
5457         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5458         if (r) {
5459                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5460                 return r;
5461         }
5462
5463         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5464         if (r) {
5465                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5466                 return r;
5467         }
5468
5469         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5470         if (r) {
5471                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5472                 return r;
5473         }
5474
5475         if (rdev->has_uvd) {
5476                 r = rv770_uvd_resume(rdev);
5477                 if (!r) {
5478                         r = radeon_fence_driver_start_ring(rdev,
5479                                                            R600_RING_TYPE_UVD_INDEX);
5480                         if (r)
5481                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5482                 }
5483                 if (r)
5484                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5485         }
5486
5487         /* Enable IRQ */
5488         if (!rdev->irq.installed) {
5489                 r = radeon_irq_kms_init(rdev);
5490                 if (r)
5491                         return r;
5492         }
5493
5494         r = si_irq_init(rdev);
5495         if (r) {
5496                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5497                 radeon_irq_kms_fini(rdev);
5498                 return r;
5499         }
5500         si_irq_set(rdev);
5501
5502         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5503         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5504                              CP_RB0_RPTR, CP_RB0_WPTR,
5505                              0, 0xfffff, RADEON_CP_PACKET2);
5506         if (r)
5507                 return r;
5508
5509         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5510         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5511                              CP_RB1_RPTR, CP_RB1_WPTR,
5512                              0, 0xfffff, RADEON_CP_PACKET2);
5513         if (r)
5514                 return r;
5515
5516         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5517         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5518                              CP_RB2_RPTR, CP_RB2_WPTR,
5519                              0, 0xfffff, RADEON_CP_PACKET2);
5520         if (r)
5521                 return r;
5522
5523         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5524         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5525                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5526                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5527                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5528         if (r)
5529                 return r;
5530
5531         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5532         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5533                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5534                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5535                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5536         if (r)
5537                 return r;
5538
5539         r = si_cp_load_microcode(rdev);
5540         if (r)
5541                 return r;
5542         r = si_cp_resume(rdev);
5543         if (r)
5544                 return r;
5545
5546         r = cayman_dma_resume(rdev);
5547         if (r)
5548                 return r;
5549
5550         if (rdev->has_uvd) {
5551                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5552                 if (ring->ring_size) {
5553                         r = radeon_ring_init(rdev, ring, ring->ring_size,
5554                                              R600_WB_UVD_RPTR_OFFSET,
5555                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5556                                              0, 0xfffff, RADEON_CP_PACKET2);
5557                         if (!r)
5558                                 r = r600_uvd_init(rdev);
5559                         if (r)
5560                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5561                 }
5562         }
5563
5564         r = radeon_ib_pool_init(rdev);
5565         if (r) {
5566                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5567                 return r;
5568         }
5569
5570         r = radeon_vm_manager_init(rdev);
5571         if (r) {
5572                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5573                 return r;
5574         }
5575
5576         return 0;
5577 }
5578
5579 int si_resume(struct radeon_device *rdev)
5580 {
5581         int r;
5582
5583         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5584          * posting will perform necessary task to bring back GPU into good
5585          * shape.
5586          */
5587         /* post card */
5588         atom_asic_init(rdev->mode_info.atom_context);
5589
5590         /* init golden registers */
5591         si_init_golden_registers(rdev);
5592
5593         rdev->accel_working = true;
5594         r = si_startup(rdev);
5595         if (r) {
5596                 DRM_ERROR("si startup failed on resume\n");
5597                 rdev->accel_working = false;
5598                 return r;
5599         }
5600
5601         return r;
5602
5603 }
5604
5605 int si_suspend(struct radeon_device *rdev)
5606 {
5607         radeon_vm_manager_fini(rdev);
5608         si_cp_enable(rdev, false);
5609         cayman_dma_stop(rdev);
5610         if (rdev->has_uvd) {
5611                 r600_uvd_rbc_stop(rdev);
5612                 radeon_uvd_suspend(rdev);
5613         }
5614         si_irq_suspend(rdev);
5615         radeon_wb_disable(rdev);
5616         si_pcie_gart_disable(rdev);
5617         return 0;
5618 }
5619
5620 /* Plan is to move initialization in that function and use
5621  * helper function so that radeon_device_init pretty much
5622  * do nothing more than calling asic specific function. This
5623  * should also allow to remove a bunch of callback function
5624  * like vram_info.
5625  */
5626 int si_init(struct radeon_device *rdev)
5627 {
5628         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5629         int r;
5630
5631         /* Read BIOS */
5632         if (!radeon_get_bios(rdev)) {
5633                 if (ASIC_IS_AVIVO(rdev))
5634                         return -EINVAL;
5635         }
5636         /* Must be an ATOMBIOS */
5637         if (!rdev->is_atom_bios) {
5638                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5639                 return -EINVAL;
5640         }
5641         r = radeon_atombios_init(rdev);
5642         if (r)
5643                 return r;
5644
5645         /* Post card if necessary */
5646         if (!radeon_card_posted(rdev)) {
5647                 if (!rdev->bios) {
5648                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5649                         return -EINVAL;
5650                 }
5651                 DRM_INFO("GPU not posted. posting now...\n");
5652                 atom_asic_init(rdev->mode_info.atom_context);
5653         }
5654         /* init golden registers */
5655         si_init_golden_registers(rdev);
5656         /* Initialize scratch registers */
5657         si_scratch_init(rdev);
5658         /* Initialize surface registers */
5659         radeon_surface_init(rdev);
5660         /* Initialize clocks */
5661         radeon_get_clock_info(rdev->ddev);
5662
5663         /* Fence driver */
5664         r = radeon_fence_driver_init(rdev);
5665         if (r)
5666                 return r;
5667
5668         /* initialize memory controller */
5669         r = si_mc_init(rdev);
5670         if (r)
5671                 return r;
5672         /* Memory manager */
5673         r = radeon_bo_init(rdev);
5674         if (r)
5675                 return r;
5676
5677         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5678         ring->ring_obj = NULL;
5679         r600_ring_init(rdev, ring, 1024 * 1024);
5680
5681         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5682         ring->ring_obj = NULL;
5683         r600_ring_init(rdev, ring, 1024 * 1024);
5684
5685         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5686         ring->ring_obj = NULL;
5687         r600_ring_init(rdev, ring, 1024 * 1024);
5688
5689         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5690         ring->ring_obj = NULL;
5691         r600_ring_init(rdev, ring, 64 * 1024);
5692
5693         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5694         ring->ring_obj = NULL;
5695         r600_ring_init(rdev, ring, 64 * 1024);
5696
5697         if (rdev->has_uvd) {
5698                 r = radeon_uvd_init(rdev);
5699                 if (!r) {
5700                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5701                         ring->ring_obj = NULL;
5702                         r600_ring_init(rdev, ring, 4096);
5703                 }
5704         }
5705
5706         rdev->ih.ring_obj = NULL;
5707         r600_ih_ring_init(rdev, 64 * 1024);
5708
5709         r = r600_pcie_gart_init(rdev);
5710         if (r)
5711                 return r;
5712
5713         rdev->accel_working = true;
5714         r = si_startup(rdev);
5715         if (r) {
5716                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5717                 si_cp_fini(rdev);
5718                 cayman_dma_fini(rdev);
5719                 si_irq_fini(rdev);
5720                 si_rlc_fini(rdev);
5721                 radeon_wb_fini(rdev);
5722                 radeon_ib_pool_fini(rdev);
5723                 radeon_vm_manager_fini(rdev);
5724                 radeon_irq_kms_fini(rdev);
5725                 si_pcie_gart_fini(rdev);
5726                 rdev->accel_working = false;
5727         }
5728
5729         /* Don't start up if the MC ucode is missing.
5730          * The default clocks and voltages before the MC ucode
5731          * is loaded are not suffient for advanced operations.
5732          */
5733         if (!rdev->mc_fw) {
5734                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5735                 return -EINVAL;
5736         }
5737
5738         return 0;
5739 }
5740
5741 void si_fini(struct radeon_device *rdev)
5742 {
5743         si_cp_fini(rdev);
5744         cayman_dma_fini(rdev);
5745         si_irq_fini(rdev);
5746         si_rlc_fini(rdev);
5747         radeon_wb_fini(rdev);
5748         radeon_vm_manager_fini(rdev);
5749         radeon_ib_pool_fini(rdev);
5750         radeon_irq_kms_fini(rdev);
5751         if (rdev->has_uvd)
5752                 radeon_uvd_fini(rdev);
5753         si_pcie_gart_fini(rdev);
5754         r600_vram_scratch_fini(rdev);
5755         radeon_gem_fini(rdev);
5756         radeon_fence_driver_fini(rdev);
5757         radeon_bo_fini(rdev);
5758         radeon_atombios_fini(rdev);
5759         kfree(rdev->bios);
5760         rdev->bios = NULL;
5761 }
5762
5763 /**
5764  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5765  *
5766  * @rdev: radeon_device pointer
5767  *
5768  * Fetches a GPU clock counter snapshot (SI).
5769  * Returns the 64 bit clock counter snapshot.
5770  */
5771 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5772 {
5773         uint64_t clock;
5774
5775         mutex_lock(&rdev->gpu_clock_mutex);
5776         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5777         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5778                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5779         mutex_unlock(&rdev->gpu_clock_mutex);
5780         return clock;
5781 }
5782
5783 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5784 {
5785         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5786         int r;
5787
5788         /* bypass vclk and dclk with bclk */
5789         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5790                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5791                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5792
5793         /* put PLL in bypass mode */
5794         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5795
5796         if (!vclk || !dclk) {
5797                 /* keep the Bypass mode, put PLL to sleep */
5798                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5799                 return 0;
5800         }
5801
5802         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5803                                           16384, 0x03FFFFFF, 0, 128, 5,
5804                                           &fb_div, &vclk_div, &dclk_div);
5805         if (r)
5806                 return r;
5807
5808         /* set RESET_ANTI_MUX to 0 */
5809         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5810
5811         /* set VCO_MODE to 1 */
5812         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5813
5814         /* toggle UPLL_SLEEP to 1 then back to 0 */
5815         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5816         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5817
5818         /* deassert UPLL_RESET */
5819         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5820
5821         mdelay(1);
5822
5823         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5824         if (r)
5825                 return r;
5826
5827         /* assert UPLL_RESET again */
5828         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5829
5830         /* disable spread spectrum. */
5831         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5832
5833         /* set feedback divider */
5834         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5835
5836         /* set ref divider to 0 */
5837         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5838
5839         if (fb_div < 307200)
5840                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5841         else
5842                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5843
5844         /* set PDIV_A and PDIV_B */
5845         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5846                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5847                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5848
5849         /* give the PLL some time to settle */
5850         mdelay(15);
5851
5852         /* deassert PLL_RESET */
5853         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5854
5855         mdelay(15);
5856
5857         /* switch from bypass mode to normal mode */
5858         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5859
5860         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5861         if (r)
5862                 return r;
5863
5864         /* switch VCLK and DCLK selection */
5865         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5866                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5867                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5868
5869         mdelay(100);
5870
5871         return 0;
5872 }
5873
5874 static void si_pcie_gen3_enable(struct radeon_device *rdev)
5875 {
5876         struct pci_dev *root = rdev->pdev->bus->self;
5877         int bridge_pos, gpu_pos;
5878         u32 speed_cntl, mask, current_data_rate;
5879         int ret, i;
5880         u16 tmp16;
5881
5882         if (radeon_pcie_gen2 == 0)
5883                 return;
5884
5885         if (rdev->flags & RADEON_IS_IGP)
5886                 return;
5887
5888         if (!(rdev->flags & RADEON_IS_PCIE))
5889                 return;
5890
5891         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
5892         if (ret != 0)
5893                 return;
5894
5895         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
5896                 return;
5897
5898         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
5899         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
5900                 LC_CURRENT_DATA_RATE_SHIFT;
5901         if (mask & DRM_PCIE_SPEED_80) {
5902                 if (current_data_rate == 2) {
5903                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
5904                         return;
5905                 }
5906                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
5907         } else if (mask & DRM_PCIE_SPEED_50) {
5908                 if (current_data_rate == 1) {
5909                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
5910                         return;
5911                 }
5912                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
5913         }
5914
5915         bridge_pos = pci_pcie_cap(root);
5916         if (!bridge_pos)
5917                 return;
5918
5919         gpu_pos = pci_pcie_cap(rdev->pdev);
5920         if (!gpu_pos)
5921                 return;
5922
5923         if (mask & DRM_PCIE_SPEED_80) {
5924                 /* re-try equalization if gen3 is not already enabled */
5925                 if (current_data_rate != 2) {
5926                         u16 bridge_cfg, gpu_cfg;
5927                         u16 bridge_cfg2, gpu_cfg2;
5928                         u32 max_lw, current_lw, tmp;
5929
5930                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
5931                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
5932
5933                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
5934                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
5935
5936                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
5937                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
5938
5939                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
5940                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
5941                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
5942
5943                         if (current_lw < max_lw) {
5944                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
5945                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
5946                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
5947                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
5948                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
5949                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
5950                                 }
5951                         }
5952
5953                         for (i = 0; i < 10; i++) {
5954                                 /* check status */
5955                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
5956                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
5957                                         break;
5958
5959                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
5960                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
5961
5962                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
5963                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
5964
5965                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
5966                                 tmp |= LC_SET_QUIESCE;
5967                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
5968
5969                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
5970                                 tmp |= LC_REDO_EQ;
5971                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
5972
5973                                 mdelay(100);
5974
5975                                 /* linkctl */
5976                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
5977                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
5978                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
5979                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
5980
5981                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
5982                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
5983                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
5984                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
5985
5986                                 /* linkctl2 */
5987                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
5988                                 tmp16 &= ~((1 << 4) | (7 << 9));
5989                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
5990                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
5991
5992                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
5993                                 tmp16 &= ~((1 << 4) | (7 << 9));
5994                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
5995                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
5996
5997                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
5998                                 tmp &= ~LC_SET_QUIESCE;
5999                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6000                         }
6001                 }
6002         }
6003
6004         /* set the link speed */
6005         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6006         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6007         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6008
6009         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6010         tmp16 &= ~0xf;
6011         if (mask & DRM_PCIE_SPEED_80)
6012                 tmp16 |= 3; /* gen3 */
6013         else if (mask & DRM_PCIE_SPEED_50)
6014                 tmp16 |= 2; /* gen2 */
6015         else
6016                 tmp16 |= 1; /* gen1 */
6017         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6018
6019         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6020         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6021         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6022
6023         for (i = 0; i < rdev->usec_timeout; i++) {
6024                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6025                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6026                         break;
6027                 udelay(1);
6028         }
6029 }
6030
6031 static void si_program_aspm(struct radeon_device *rdev)
6032 {
6033         u32 data, orig;
6034         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6035         bool disable_clkreq = false;
6036
6037         if (!(rdev->flags & RADEON_IS_PCIE))
6038                 return;
6039
6040         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6041         data &= ~LC_XMIT_N_FTS_MASK;
6042         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6043         if (orig != data)
6044                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6045
6046         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6047         data |= LC_GO_TO_RECOVERY;
6048         if (orig != data)
6049                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6050
6051         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6052         data |= P_IGNORE_EDB_ERR;
6053         if (orig != data)
6054                 WREG32_PCIE(PCIE_P_CNTL, data);
6055
6056         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6057         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6058         data |= LC_PMI_TO_L1_DIS;
6059         if (!disable_l0s)
6060                 data |= LC_L0S_INACTIVITY(7);
6061
6062         if (!disable_l1) {
6063                 data |= LC_L1_INACTIVITY(7);
6064                 data &= ~LC_PMI_TO_L1_DIS;
6065                 if (orig != data)
6066                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6067
6068                 if (!disable_plloff_in_l1) {
6069                         bool clk_req_support;
6070
6071                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6072                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6073                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6074                         if (orig != data)
6075                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6076
6077                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6078                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6079                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6080                         if (orig != data)
6081                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6082
6083                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6084                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6085                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6086                         if (orig != data)
6087                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6088
6089                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6090                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6091                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6092                         if (orig != data)
6093                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6094
6095                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
6096                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6097                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6098                                 if (orig != data)
6099                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6100
6101                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6102                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6103                                 if (orig != data)
6104                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6105
6106                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
6107                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6108                                 if (orig != data)
6109                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
6110
6111                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
6112                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6113                                 if (orig != data)
6114                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
6115
6116                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6117                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6118                                 if (orig != data)
6119                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6120
6121                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6122                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6123                                 if (orig != data)
6124                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6125
6126                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
6127                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6128                                 if (orig != data)
6129                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
6130
6131                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
6132                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6133                                 if (orig != data)
6134                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
6135                         }
6136                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6137                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
6138                         data |= LC_DYN_LANES_PWR_STATE(3);
6139                         if (orig != data)
6140                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
6141
6142                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
6143                         data &= ~LS2_EXIT_TIME_MASK;
6144                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6145                                 data |= LS2_EXIT_TIME(5);
6146                         if (orig != data)
6147                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
6148
6149                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
6150                         data &= ~LS2_EXIT_TIME_MASK;
6151                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6152                                 data |= LS2_EXIT_TIME(5);
6153                         if (orig != data)
6154                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
6155
6156                         if (!disable_clkreq) {
6157                                 struct pci_dev *root = rdev->pdev->bus->self;
6158                                 u32 lnkcap;
6159
6160                                 clk_req_support = false;
6161                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
6162                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
6163                                         clk_req_support = true;
6164                         } else {
6165                                 clk_req_support = false;
6166                         }
6167
6168                         if (clk_req_support) {
6169                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
6170                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
6171                                 if (orig != data)
6172                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
6173
6174                                 orig = data = RREG32(THM_CLK_CNTL);
6175                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
6176                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
6177                                 if (orig != data)
6178                                         WREG32(THM_CLK_CNTL, data);
6179
6180                                 orig = data = RREG32(MISC_CLK_CNTL);
6181                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
6182                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
6183                                 if (orig != data)
6184                                         WREG32(MISC_CLK_CNTL, data);
6185
6186                                 orig = data = RREG32(CG_CLKPIN_CNTL);
6187                                 data &= ~BCLK_AS_XCLK;
6188                                 if (orig != data)
6189                                         WREG32(CG_CLKPIN_CNTL, data);
6190
6191                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
6192                                 data &= ~FORCE_BIF_REFCLK_EN;
6193                                 if (orig != data)
6194                                         WREG32(CG_CLKPIN_CNTL_2, data);
6195
6196                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
6197                                 data &= ~MPLL_CLKOUT_SEL_MASK;
6198                                 data |= MPLL_CLKOUT_SEL(4);
6199                                 if (orig != data)
6200                                         WREG32(MPLL_BYPASSCLK_SEL, data);
6201
6202                                 orig = data = RREG32(SPLL_CNTL_MODE);
6203                                 data &= ~SPLL_REFCLK_SEL_MASK;
6204                                 if (orig != data)
6205                                         WREG32(SPLL_CNTL_MODE, data);
6206                         }
6207                 }
6208         } else {
6209                 if (orig != data)
6210                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6211         }
6212
6213         orig = data = RREG32_PCIE(PCIE_CNTL2);
6214         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
6215         if (orig != data)
6216                 WREG32_PCIE(PCIE_CNTL2, data);
6217
6218         if (!disable_l0s) {
6219                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6220                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
6221                         data = RREG32_PCIE(PCIE_LC_STATUS1);
6222                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
6223                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6224                                 data &= ~LC_L0S_INACTIVITY_MASK;
6225                                 if (orig != data)
6226                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6227                         }
6228                 }
6229         }
6230 }