drm/radeon: track which asics have UVD
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68
69 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
70 extern void r600_ih_ring_fini(struct radeon_device *rdev);
71 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
75 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
76 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
77
78 static const u32 tahiti_golden_rlc_registers[] =
79 {
80         0xc424, 0xffffffff, 0x00601005,
81         0xc47c, 0xffffffff, 0x10104040,
82         0xc488, 0xffffffff, 0x0100000a,
83         0xc314, 0xffffffff, 0x00000800,
84         0xc30c, 0xffffffff, 0x800000f4,
85         0xf4a8, 0xffffffff, 0x00000000
86 };
87
88 static const u32 tahiti_golden_registers[] =
89 {
90         0x9a10, 0x00010000, 0x00018208,
91         0x9830, 0xffffffff, 0x00000000,
92         0x9834, 0xf00fffff, 0x00000400,
93         0x9838, 0x0002021c, 0x00020200,
94         0xc78, 0x00000080, 0x00000000,
95         0xd030, 0x000300c0, 0x00800040,
96         0xd830, 0x000300c0, 0x00800040,
97         0x5bb0, 0x000000f0, 0x00000070,
98         0x5bc0, 0x00200000, 0x50100000,
99         0x7030, 0x31000311, 0x00000011,
100         0x277c, 0x00000003, 0x000007ff,
101         0x240c, 0x000007ff, 0x00000000,
102         0x8a14, 0xf000001f, 0x00000007,
103         0x8b24, 0xffffffff, 0x00ffffff,
104         0x8b10, 0x0000ff0f, 0x00000000,
105         0x28a4c, 0x07ffffff, 0x4e000000,
106         0x28350, 0x3f3f3fff, 0x2a00126a,
107         0x30, 0x000000ff, 0x0040,
108         0x34, 0x00000040, 0x00004040,
109         0x9100, 0x07ffffff, 0x03000000,
110         0x8e88, 0x01ff1f3f, 0x00000000,
111         0x8e84, 0x01ff1f3f, 0x00000000,
112         0x9060, 0x0000007f, 0x00000020,
113         0x9508, 0x00010000, 0x00010000,
114         0xac14, 0x00000200, 0x000002fb,
115         0xac10, 0xffffffff, 0x0000543b,
116         0xac0c, 0xffffffff, 0xa9210876,
117         0x88d0, 0xffffffff, 0x000fff40,
118         0x88d4, 0x0000001f, 0x00000010,
119         0x1410, 0x20000000, 0x20fffed8,
120         0x15c0, 0x000c0fc0, 0x000c0400
121 };
122
123 static const u32 tahiti_golden_registers2[] =
124 {
125         0xc64, 0x00000001, 0x00000001
126 };
127
128 static const u32 pitcairn_golden_rlc_registers[] =
129 {
130         0xc424, 0xffffffff, 0x00601004,
131         0xc47c, 0xffffffff, 0x10102020,
132         0xc488, 0xffffffff, 0x01000020,
133         0xc314, 0xffffffff, 0x00000800,
134         0xc30c, 0xffffffff, 0x800000a4
135 };
136
137 static const u32 pitcairn_golden_registers[] =
138 {
139         0x9a10, 0x00010000, 0x00018208,
140         0x9830, 0xffffffff, 0x00000000,
141         0x9834, 0xf00fffff, 0x00000400,
142         0x9838, 0x0002021c, 0x00020200,
143         0xc78, 0x00000080, 0x00000000,
144         0xd030, 0x000300c0, 0x00800040,
145         0xd830, 0x000300c0, 0x00800040,
146         0x5bb0, 0x000000f0, 0x00000070,
147         0x5bc0, 0x00200000, 0x50100000,
148         0x7030, 0x31000311, 0x00000011,
149         0x2ae4, 0x00073ffe, 0x000022a2,
150         0x240c, 0x000007ff, 0x00000000,
151         0x8a14, 0xf000001f, 0x00000007,
152         0x8b24, 0xffffffff, 0x00ffffff,
153         0x8b10, 0x0000ff0f, 0x00000000,
154         0x28a4c, 0x07ffffff, 0x4e000000,
155         0x28350, 0x3f3f3fff, 0x2a00126a,
156         0x30, 0x000000ff, 0x0040,
157         0x34, 0x00000040, 0x00004040,
158         0x9100, 0x07ffffff, 0x03000000,
159         0x9060, 0x0000007f, 0x00000020,
160         0x9508, 0x00010000, 0x00010000,
161         0xac14, 0x000003ff, 0x000000f7,
162         0xac10, 0xffffffff, 0x00000000,
163         0xac0c, 0xffffffff, 0x32761054,
164         0x88d4, 0x0000001f, 0x00000010,
165         0x15c0, 0x000c0fc0, 0x000c0400
166 };
167
168 static const u32 verde_golden_rlc_registers[] =
169 {
170         0xc424, 0xffffffff, 0x033f1005,
171         0xc47c, 0xffffffff, 0x10808020,
172         0xc488, 0xffffffff, 0x00800008,
173         0xc314, 0xffffffff, 0x00001000,
174         0xc30c, 0xffffffff, 0x80010014
175 };
176
177 static const u32 verde_golden_registers[] =
178 {
179         0x9a10, 0x00010000, 0x00018208,
180         0x9830, 0xffffffff, 0x00000000,
181         0x9834, 0xf00fffff, 0x00000400,
182         0x9838, 0x0002021c, 0x00020200,
183         0xc78, 0x00000080, 0x00000000,
184         0xd030, 0x000300c0, 0x00800040,
185         0xd030, 0x000300c0, 0x00800040,
186         0xd830, 0x000300c0, 0x00800040,
187         0xd830, 0x000300c0, 0x00800040,
188         0x5bb0, 0x000000f0, 0x00000070,
189         0x5bc0, 0x00200000, 0x50100000,
190         0x7030, 0x31000311, 0x00000011,
191         0x2ae4, 0x00073ffe, 0x000022a2,
192         0x2ae4, 0x00073ffe, 0x000022a2,
193         0x2ae4, 0x00073ffe, 0x000022a2,
194         0x240c, 0x000007ff, 0x00000000,
195         0x240c, 0x000007ff, 0x00000000,
196         0x240c, 0x000007ff, 0x00000000,
197         0x8a14, 0xf000001f, 0x00000007,
198         0x8a14, 0xf000001f, 0x00000007,
199         0x8a14, 0xf000001f, 0x00000007,
200         0x8b24, 0xffffffff, 0x00ffffff,
201         0x8b10, 0x0000ff0f, 0x00000000,
202         0x28a4c, 0x07ffffff, 0x4e000000,
203         0x28350, 0x3f3f3fff, 0x0000124a,
204         0x28350, 0x3f3f3fff, 0x0000124a,
205         0x28350, 0x3f3f3fff, 0x0000124a,
206         0x30, 0x000000ff, 0x0040,
207         0x34, 0x00000040, 0x00004040,
208         0x9100, 0x07ffffff, 0x03000000,
209         0x9100, 0x07ffffff, 0x03000000,
210         0x8e88, 0x01ff1f3f, 0x00000000,
211         0x8e88, 0x01ff1f3f, 0x00000000,
212         0x8e88, 0x01ff1f3f, 0x00000000,
213         0x8e84, 0x01ff1f3f, 0x00000000,
214         0x8e84, 0x01ff1f3f, 0x00000000,
215         0x8e84, 0x01ff1f3f, 0x00000000,
216         0x9060, 0x0000007f, 0x00000020,
217         0x9508, 0x00010000, 0x00010000,
218         0xac14, 0x000003ff, 0x00000003,
219         0xac14, 0x000003ff, 0x00000003,
220         0xac14, 0x000003ff, 0x00000003,
221         0xac10, 0xffffffff, 0x00000000,
222         0xac10, 0xffffffff, 0x00000000,
223         0xac10, 0xffffffff, 0x00000000,
224         0xac0c, 0xffffffff, 0x00001032,
225         0xac0c, 0xffffffff, 0x00001032,
226         0xac0c, 0xffffffff, 0x00001032,
227         0x88d4, 0x0000001f, 0x00000010,
228         0x88d4, 0x0000001f, 0x00000010,
229         0x88d4, 0x0000001f, 0x00000010,
230         0x15c0, 0x000c0fc0, 0x000c0400
231 };
232
233 static const u32 oland_golden_rlc_registers[] =
234 {
235         0xc424, 0xffffffff, 0x00601005,
236         0xc47c, 0xffffffff, 0x10104040,
237         0xc488, 0xffffffff, 0x0100000a,
238         0xc314, 0xffffffff, 0x00000800,
239         0xc30c, 0xffffffff, 0x800000f4
240 };
241
242 static const u32 oland_golden_registers[] =
243 {
244         0x9a10, 0x00010000, 0x00018208,
245         0x9830, 0xffffffff, 0x00000000,
246         0x9834, 0xf00fffff, 0x00000400,
247         0x9838, 0x0002021c, 0x00020200,
248         0xc78, 0x00000080, 0x00000000,
249         0xd030, 0x000300c0, 0x00800040,
250         0xd830, 0x000300c0, 0x00800040,
251         0x5bb0, 0x000000f0, 0x00000070,
252         0x5bc0, 0x00200000, 0x50100000,
253         0x7030, 0x31000311, 0x00000011,
254         0x2ae4, 0x00073ffe, 0x000022a2,
255         0x240c, 0x000007ff, 0x00000000,
256         0x8a14, 0xf000001f, 0x00000007,
257         0x8b24, 0xffffffff, 0x00ffffff,
258         0x8b10, 0x0000ff0f, 0x00000000,
259         0x28a4c, 0x07ffffff, 0x4e000000,
260         0x28350, 0x3f3f3fff, 0x00000082,
261         0x30, 0x000000ff, 0x0040,
262         0x34, 0x00000040, 0x00004040,
263         0x9100, 0x07ffffff, 0x03000000,
264         0x9060, 0x0000007f, 0x00000020,
265         0x9508, 0x00010000, 0x00010000,
266         0xac14, 0x000003ff, 0x000000f3,
267         0xac10, 0xffffffff, 0x00000000,
268         0xac0c, 0xffffffff, 0x00003210,
269         0x88d4, 0x0000001f, 0x00000010,
270         0x15c0, 0x000c0fc0, 0x000c0400
271 };
272
273 static const u32 tahiti_mgcg_cgcg_init[] =
274 {
275         0xc400, 0xffffffff, 0xfffffffc,
276         0x802c, 0xffffffff, 0xe0000000,
277         0x9a60, 0xffffffff, 0x00000100,
278         0x92a4, 0xffffffff, 0x00000100,
279         0xc164, 0xffffffff, 0x00000100,
280         0x9774, 0xffffffff, 0x00000100,
281         0x8984, 0xffffffff, 0x06000100,
282         0x8a18, 0xffffffff, 0x00000100,
283         0x92a0, 0xffffffff, 0x00000100,
284         0xc380, 0xffffffff, 0x00000100,
285         0x8b28, 0xffffffff, 0x00000100,
286         0x9144, 0xffffffff, 0x00000100,
287         0x8d88, 0xffffffff, 0x00000100,
288         0x8d8c, 0xffffffff, 0x00000100,
289         0x9030, 0xffffffff, 0x00000100,
290         0x9034, 0xffffffff, 0x00000100,
291         0x9038, 0xffffffff, 0x00000100,
292         0x903c, 0xffffffff, 0x00000100,
293         0xad80, 0xffffffff, 0x00000100,
294         0xac54, 0xffffffff, 0x00000100,
295         0x897c, 0xffffffff, 0x06000100,
296         0x9868, 0xffffffff, 0x00000100,
297         0x9510, 0xffffffff, 0x00000100,
298         0xaf04, 0xffffffff, 0x00000100,
299         0xae04, 0xffffffff, 0x00000100,
300         0x949c, 0xffffffff, 0x00000100,
301         0x802c, 0xffffffff, 0xe0000000,
302         0x9160, 0xffffffff, 0x00010000,
303         0x9164, 0xffffffff, 0x00030002,
304         0x9168, 0xffffffff, 0x00040007,
305         0x916c, 0xffffffff, 0x00060005,
306         0x9170, 0xffffffff, 0x00090008,
307         0x9174, 0xffffffff, 0x00020001,
308         0x9178, 0xffffffff, 0x00040003,
309         0x917c, 0xffffffff, 0x00000007,
310         0x9180, 0xffffffff, 0x00060005,
311         0x9184, 0xffffffff, 0x00090008,
312         0x9188, 0xffffffff, 0x00030002,
313         0x918c, 0xffffffff, 0x00050004,
314         0x9190, 0xffffffff, 0x00000008,
315         0x9194, 0xffffffff, 0x00070006,
316         0x9198, 0xffffffff, 0x000a0009,
317         0x919c, 0xffffffff, 0x00040003,
318         0x91a0, 0xffffffff, 0x00060005,
319         0x91a4, 0xffffffff, 0x00000009,
320         0x91a8, 0xffffffff, 0x00080007,
321         0x91ac, 0xffffffff, 0x000b000a,
322         0x91b0, 0xffffffff, 0x00050004,
323         0x91b4, 0xffffffff, 0x00070006,
324         0x91b8, 0xffffffff, 0x0008000b,
325         0x91bc, 0xffffffff, 0x000a0009,
326         0x91c0, 0xffffffff, 0x000d000c,
327         0x91c4, 0xffffffff, 0x00060005,
328         0x91c8, 0xffffffff, 0x00080007,
329         0x91cc, 0xffffffff, 0x0000000b,
330         0x91d0, 0xffffffff, 0x000a0009,
331         0x91d4, 0xffffffff, 0x000d000c,
332         0x91d8, 0xffffffff, 0x00070006,
333         0x91dc, 0xffffffff, 0x00090008,
334         0x91e0, 0xffffffff, 0x0000000c,
335         0x91e4, 0xffffffff, 0x000b000a,
336         0x91e8, 0xffffffff, 0x000e000d,
337         0x91ec, 0xffffffff, 0x00080007,
338         0x91f0, 0xffffffff, 0x000a0009,
339         0x91f4, 0xffffffff, 0x0000000d,
340         0x91f8, 0xffffffff, 0x000c000b,
341         0x91fc, 0xffffffff, 0x000f000e,
342         0x9200, 0xffffffff, 0x00090008,
343         0x9204, 0xffffffff, 0x000b000a,
344         0x9208, 0xffffffff, 0x000c000f,
345         0x920c, 0xffffffff, 0x000e000d,
346         0x9210, 0xffffffff, 0x00110010,
347         0x9214, 0xffffffff, 0x000a0009,
348         0x9218, 0xffffffff, 0x000c000b,
349         0x921c, 0xffffffff, 0x0000000f,
350         0x9220, 0xffffffff, 0x000e000d,
351         0x9224, 0xffffffff, 0x00110010,
352         0x9228, 0xffffffff, 0x000b000a,
353         0x922c, 0xffffffff, 0x000d000c,
354         0x9230, 0xffffffff, 0x00000010,
355         0x9234, 0xffffffff, 0x000f000e,
356         0x9238, 0xffffffff, 0x00120011,
357         0x923c, 0xffffffff, 0x000c000b,
358         0x9240, 0xffffffff, 0x000e000d,
359         0x9244, 0xffffffff, 0x00000011,
360         0x9248, 0xffffffff, 0x0010000f,
361         0x924c, 0xffffffff, 0x00130012,
362         0x9250, 0xffffffff, 0x000d000c,
363         0x9254, 0xffffffff, 0x000f000e,
364         0x9258, 0xffffffff, 0x00100013,
365         0x925c, 0xffffffff, 0x00120011,
366         0x9260, 0xffffffff, 0x00150014,
367         0x9264, 0xffffffff, 0x000e000d,
368         0x9268, 0xffffffff, 0x0010000f,
369         0x926c, 0xffffffff, 0x00000013,
370         0x9270, 0xffffffff, 0x00120011,
371         0x9274, 0xffffffff, 0x00150014,
372         0x9278, 0xffffffff, 0x000f000e,
373         0x927c, 0xffffffff, 0x00110010,
374         0x9280, 0xffffffff, 0x00000014,
375         0x9284, 0xffffffff, 0x00130012,
376         0x9288, 0xffffffff, 0x00160015,
377         0x928c, 0xffffffff, 0x0010000f,
378         0x9290, 0xffffffff, 0x00120011,
379         0x9294, 0xffffffff, 0x00000015,
380         0x9298, 0xffffffff, 0x00140013,
381         0x929c, 0xffffffff, 0x00170016,
382         0x9150, 0xffffffff, 0x96940200,
383         0x8708, 0xffffffff, 0x00900100,
384         0xc478, 0xffffffff, 0x00000080,
385         0xc404, 0xffffffff, 0x0020003f,
386         0x30, 0xffffffff, 0x0000001c,
387         0x34, 0x000f0000, 0x000f0000,
388         0x160c, 0xffffffff, 0x00000100,
389         0x1024, 0xffffffff, 0x00000100,
390         0x102c, 0x00000101, 0x00000000,
391         0x20a8, 0xffffffff, 0x00000104,
392         0x264c, 0x000c0000, 0x000c0000,
393         0x2648, 0x000c0000, 0x000c0000,
394         0x55e4, 0xff000fff, 0x00000100,
395         0x55e8, 0x00000001, 0x00000001,
396         0x2f50, 0x00000001, 0x00000001,
397         0x30cc, 0xc0000fff, 0x00000104,
398         0xc1e4, 0x00000001, 0x00000001,
399         0xd0c0, 0xfffffff0, 0x00000100,
400         0xd8c0, 0xfffffff0, 0x00000100
401 };
402
403 static const u32 pitcairn_mgcg_cgcg_init[] =
404 {
405         0xc400, 0xffffffff, 0xfffffffc,
406         0x802c, 0xffffffff, 0xe0000000,
407         0x9a60, 0xffffffff, 0x00000100,
408         0x92a4, 0xffffffff, 0x00000100,
409         0xc164, 0xffffffff, 0x00000100,
410         0x9774, 0xffffffff, 0x00000100,
411         0x8984, 0xffffffff, 0x06000100,
412         0x8a18, 0xffffffff, 0x00000100,
413         0x92a0, 0xffffffff, 0x00000100,
414         0xc380, 0xffffffff, 0x00000100,
415         0x8b28, 0xffffffff, 0x00000100,
416         0x9144, 0xffffffff, 0x00000100,
417         0x8d88, 0xffffffff, 0x00000100,
418         0x8d8c, 0xffffffff, 0x00000100,
419         0x9030, 0xffffffff, 0x00000100,
420         0x9034, 0xffffffff, 0x00000100,
421         0x9038, 0xffffffff, 0x00000100,
422         0x903c, 0xffffffff, 0x00000100,
423         0xad80, 0xffffffff, 0x00000100,
424         0xac54, 0xffffffff, 0x00000100,
425         0x897c, 0xffffffff, 0x06000100,
426         0x9868, 0xffffffff, 0x00000100,
427         0x9510, 0xffffffff, 0x00000100,
428         0xaf04, 0xffffffff, 0x00000100,
429         0xae04, 0xffffffff, 0x00000100,
430         0x949c, 0xffffffff, 0x00000100,
431         0x802c, 0xffffffff, 0xe0000000,
432         0x9160, 0xffffffff, 0x00010000,
433         0x9164, 0xffffffff, 0x00030002,
434         0x9168, 0xffffffff, 0x00040007,
435         0x916c, 0xffffffff, 0x00060005,
436         0x9170, 0xffffffff, 0x00090008,
437         0x9174, 0xffffffff, 0x00020001,
438         0x9178, 0xffffffff, 0x00040003,
439         0x917c, 0xffffffff, 0x00000007,
440         0x9180, 0xffffffff, 0x00060005,
441         0x9184, 0xffffffff, 0x00090008,
442         0x9188, 0xffffffff, 0x00030002,
443         0x918c, 0xffffffff, 0x00050004,
444         0x9190, 0xffffffff, 0x00000008,
445         0x9194, 0xffffffff, 0x00070006,
446         0x9198, 0xffffffff, 0x000a0009,
447         0x919c, 0xffffffff, 0x00040003,
448         0x91a0, 0xffffffff, 0x00060005,
449         0x91a4, 0xffffffff, 0x00000009,
450         0x91a8, 0xffffffff, 0x00080007,
451         0x91ac, 0xffffffff, 0x000b000a,
452         0x91b0, 0xffffffff, 0x00050004,
453         0x91b4, 0xffffffff, 0x00070006,
454         0x91b8, 0xffffffff, 0x0008000b,
455         0x91bc, 0xffffffff, 0x000a0009,
456         0x91c0, 0xffffffff, 0x000d000c,
457         0x9200, 0xffffffff, 0x00090008,
458         0x9204, 0xffffffff, 0x000b000a,
459         0x9208, 0xffffffff, 0x000c000f,
460         0x920c, 0xffffffff, 0x000e000d,
461         0x9210, 0xffffffff, 0x00110010,
462         0x9214, 0xffffffff, 0x000a0009,
463         0x9218, 0xffffffff, 0x000c000b,
464         0x921c, 0xffffffff, 0x0000000f,
465         0x9220, 0xffffffff, 0x000e000d,
466         0x9224, 0xffffffff, 0x00110010,
467         0x9228, 0xffffffff, 0x000b000a,
468         0x922c, 0xffffffff, 0x000d000c,
469         0x9230, 0xffffffff, 0x00000010,
470         0x9234, 0xffffffff, 0x000f000e,
471         0x9238, 0xffffffff, 0x00120011,
472         0x923c, 0xffffffff, 0x000c000b,
473         0x9240, 0xffffffff, 0x000e000d,
474         0x9244, 0xffffffff, 0x00000011,
475         0x9248, 0xffffffff, 0x0010000f,
476         0x924c, 0xffffffff, 0x00130012,
477         0x9250, 0xffffffff, 0x000d000c,
478         0x9254, 0xffffffff, 0x000f000e,
479         0x9258, 0xffffffff, 0x00100013,
480         0x925c, 0xffffffff, 0x00120011,
481         0x9260, 0xffffffff, 0x00150014,
482         0x9150, 0xffffffff, 0x96940200,
483         0x8708, 0xffffffff, 0x00900100,
484         0xc478, 0xffffffff, 0x00000080,
485         0xc404, 0xffffffff, 0x0020003f,
486         0x30, 0xffffffff, 0x0000001c,
487         0x34, 0x000f0000, 0x000f0000,
488         0x160c, 0xffffffff, 0x00000100,
489         0x1024, 0xffffffff, 0x00000100,
490         0x102c, 0x00000101, 0x00000000,
491         0x20a8, 0xffffffff, 0x00000104,
492         0x55e4, 0xff000fff, 0x00000100,
493         0x55e8, 0x00000001, 0x00000001,
494         0x2f50, 0x00000001, 0x00000001,
495         0x30cc, 0xc0000fff, 0x00000104,
496         0xc1e4, 0x00000001, 0x00000001,
497         0xd0c0, 0xfffffff0, 0x00000100,
498         0xd8c0, 0xfffffff0, 0x00000100
499 };
500
501 static const u32 verde_mgcg_cgcg_init[] =
502 {
503         0xc400, 0xffffffff, 0xfffffffc,
504         0x802c, 0xffffffff, 0xe0000000,
505         0x9a60, 0xffffffff, 0x00000100,
506         0x92a4, 0xffffffff, 0x00000100,
507         0xc164, 0xffffffff, 0x00000100,
508         0x9774, 0xffffffff, 0x00000100,
509         0x8984, 0xffffffff, 0x06000100,
510         0x8a18, 0xffffffff, 0x00000100,
511         0x92a0, 0xffffffff, 0x00000100,
512         0xc380, 0xffffffff, 0x00000100,
513         0x8b28, 0xffffffff, 0x00000100,
514         0x9144, 0xffffffff, 0x00000100,
515         0x8d88, 0xffffffff, 0x00000100,
516         0x8d8c, 0xffffffff, 0x00000100,
517         0x9030, 0xffffffff, 0x00000100,
518         0x9034, 0xffffffff, 0x00000100,
519         0x9038, 0xffffffff, 0x00000100,
520         0x903c, 0xffffffff, 0x00000100,
521         0xad80, 0xffffffff, 0x00000100,
522         0xac54, 0xffffffff, 0x00000100,
523         0x897c, 0xffffffff, 0x06000100,
524         0x9868, 0xffffffff, 0x00000100,
525         0x9510, 0xffffffff, 0x00000100,
526         0xaf04, 0xffffffff, 0x00000100,
527         0xae04, 0xffffffff, 0x00000100,
528         0x949c, 0xffffffff, 0x00000100,
529         0x802c, 0xffffffff, 0xe0000000,
530         0x9160, 0xffffffff, 0x00010000,
531         0x9164, 0xffffffff, 0x00030002,
532         0x9168, 0xffffffff, 0x00040007,
533         0x916c, 0xffffffff, 0x00060005,
534         0x9170, 0xffffffff, 0x00090008,
535         0x9174, 0xffffffff, 0x00020001,
536         0x9178, 0xffffffff, 0x00040003,
537         0x917c, 0xffffffff, 0x00000007,
538         0x9180, 0xffffffff, 0x00060005,
539         0x9184, 0xffffffff, 0x00090008,
540         0x9188, 0xffffffff, 0x00030002,
541         0x918c, 0xffffffff, 0x00050004,
542         0x9190, 0xffffffff, 0x00000008,
543         0x9194, 0xffffffff, 0x00070006,
544         0x9198, 0xffffffff, 0x000a0009,
545         0x919c, 0xffffffff, 0x00040003,
546         0x91a0, 0xffffffff, 0x00060005,
547         0x91a4, 0xffffffff, 0x00000009,
548         0x91a8, 0xffffffff, 0x00080007,
549         0x91ac, 0xffffffff, 0x000b000a,
550         0x91b0, 0xffffffff, 0x00050004,
551         0x91b4, 0xffffffff, 0x00070006,
552         0x91b8, 0xffffffff, 0x0008000b,
553         0x91bc, 0xffffffff, 0x000a0009,
554         0x91c0, 0xffffffff, 0x000d000c,
555         0x9200, 0xffffffff, 0x00090008,
556         0x9204, 0xffffffff, 0x000b000a,
557         0x9208, 0xffffffff, 0x000c000f,
558         0x920c, 0xffffffff, 0x000e000d,
559         0x9210, 0xffffffff, 0x00110010,
560         0x9214, 0xffffffff, 0x000a0009,
561         0x9218, 0xffffffff, 0x000c000b,
562         0x921c, 0xffffffff, 0x0000000f,
563         0x9220, 0xffffffff, 0x000e000d,
564         0x9224, 0xffffffff, 0x00110010,
565         0x9228, 0xffffffff, 0x000b000a,
566         0x922c, 0xffffffff, 0x000d000c,
567         0x9230, 0xffffffff, 0x00000010,
568         0x9234, 0xffffffff, 0x000f000e,
569         0x9238, 0xffffffff, 0x00120011,
570         0x923c, 0xffffffff, 0x000c000b,
571         0x9240, 0xffffffff, 0x000e000d,
572         0x9244, 0xffffffff, 0x00000011,
573         0x9248, 0xffffffff, 0x0010000f,
574         0x924c, 0xffffffff, 0x00130012,
575         0x9250, 0xffffffff, 0x000d000c,
576         0x9254, 0xffffffff, 0x000f000e,
577         0x9258, 0xffffffff, 0x00100013,
578         0x925c, 0xffffffff, 0x00120011,
579         0x9260, 0xffffffff, 0x00150014,
580         0x9150, 0xffffffff, 0x96940200,
581         0x8708, 0xffffffff, 0x00900100,
582         0xc478, 0xffffffff, 0x00000080,
583         0xc404, 0xffffffff, 0x0020003f,
584         0x30, 0xffffffff, 0x0000001c,
585         0x34, 0x000f0000, 0x000f0000,
586         0x160c, 0xffffffff, 0x00000100,
587         0x1024, 0xffffffff, 0x00000100,
588         0x102c, 0x00000101, 0x00000000,
589         0x20a8, 0xffffffff, 0x00000104,
590         0x264c, 0x000c0000, 0x000c0000,
591         0x2648, 0x000c0000, 0x000c0000,
592         0x55e4, 0xff000fff, 0x00000100,
593         0x55e8, 0x00000001, 0x00000001,
594         0x2f50, 0x00000001, 0x00000001,
595         0x30cc, 0xc0000fff, 0x00000104,
596         0xc1e4, 0x00000001, 0x00000001,
597         0xd0c0, 0xfffffff0, 0x00000100,
598         0xd8c0, 0xfffffff0, 0x00000100
599 };
600
601 static const u32 oland_mgcg_cgcg_init[] =
602 {
603         0xc400, 0xffffffff, 0xfffffffc,
604         0x802c, 0xffffffff, 0xe0000000,
605         0x9a60, 0xffffffff, 0x00000100,
606         0x92a4, 0xffffffff, 0x00000100,
607         0xc164, 0xffffffff, 0x00000100,
608         0x9774, 0xffffffff, 0x00000100,
609         0x8984, 0xffffffff, 0x06000100,
610         0x8a18, 0xffffffff, 0x00000100,
611         0x92a0, 0xffffffff, 0x00000100,
612         0xc380, 0xffffffff, 0x00000100,
613         0x8b28, 0xffffffff, 0x00000100,
614         0x9144, 0xffffffff, 0x00000100,
615         0x8d88, 0xffffffff, 0x00000100,
616         0x8d8c, 0xffffffff, 0x00000100,
617         0x9030, 0xffffffff, 0x00000100,
618         0x9034, 0xffffffff, 0x00000100,
619         0x9038, 0xffffffff, 0x00000100,
620         0x903c, 0xffffffff, 0x00000100,
621         0xad80, 0xffffffff, 0x00000100,
622         0xac54, 0xffffffff, 0x00000100,
623         0x897c, 0xffffffff, 0x06000100,
624         0x9868, 0xffffffff, 0x00000100,
625         0x9510, 0xffffffff, 0x00000100,
626         0xaf04, 0xffffffff, 0x00000100,
627         0xae04, 0xffffffff, 0x00000100,
628         0x949c, 0xffffffff, 0x00000100,
629         0x802c, 0xffffffff, 0xe0000000,
630         0x9160, 0xffffffff, 0x00010000,
631         0x9164, 0xffffffff, 0x00030002,
632         0x9168, 0xffffffff, 0x00040007,
633         0x916c, 0xffffffff, 0x00060005,
634         0x9170, 0xffffffff, 0x00090008,
635         0x9174, 0xffffffff, 0x00020001,
636         0x9178, 0xffffffff, 0x00040003,
637         0x917c, 0xffffffff, 0x00000007,
638         0x9180, 0xffffffff, 0x00060005,
639         0x9184, 0xffffffff, 0x00090008,
640         0x9188, 0xffffffff, 0x00030002,
641         0x918c, 0xffffffff, 0x00050004,
642         0x9190, 0xffffffff, 0x00000008,
643         0x9194, 0xffffffff, 0x00070006,
644         0x9198, 0xffffffff, 0x000a0009,
645         0x919c, 0xffffffff, 0x00040003,
646         0x91a0, 0xffffffff, 0x00060005,
647         0x91a4, 0xffffffff, 0x00000009,
648         0x91a8, 0xffffffff, 0x00080007,
649         0x91ac, 0xffffffff, 0x000b000a,
650         0x91b0, 0xffffffff, 0x00050004,
651         0x91b4, 0xffffffff, 0x00070006,
652         0x91b8, 0xffffffff, 0x0008000b,
653         0x91bc, 0xffffffff, 0x000a0009,
654         0x91c0, 0xffffffff, 0x000d000c,
655         0x91c4, 0xffffffff, 0x00060005,
656         0x91c8, 0xffffffff, 0x00080007,
657         0x91cc, 0xffffffff, 0x0000000b,
658         0x91d0, 0xffffffff, 0x000a0009,
659         0x91d4, 0xffffffff, 0x000d000c,
660         0x9150, 0xffffffff, 0x96940200,
661         0x8708, 0xffffffff, 0x00900100,
662         0xc478, 0xffffffff, 0x00000080,
663         0xc404, 0xffffffff, 0x0020003f,
664         0x30, 0xffffffff, 0x0000001c,
665         0x34, 0x000f0000, 0x000f0000,
666         0x160c, 0xffffffff, 0x00000100,
667         0x1024, 0xffffffff, 0x00000100,
668         0x102c, 0x00000101, 0x00000000,
669         0x20a8, 0xffffffff, 0x00000104,
670         0x264c, 0x000c0000, 0x000c0000,
671         0x2648, 0x000c0000, 0x000c0000,
672         0x55e4, 0xff000fff, 0x00000100,
673         0x55e8, 0x00000001, 0x00000001,
674         0x2f50, 0x00000001, 0x00000001,
675         0x30cc, 0xc0000fff, 0x00000104,
676         0xc1e4, 0x00000001, 0x00000001,
677         0xd0c0, 0xfffffff0, 0x00000100,
678         0xd8c0, 0xfffffff0, 0x00000100
679 };
680
681 static u32 verde_pg_init[] =
682 {
683         0x353c, 0xffffffff, 0x40000,
684         0x3538, 0xffffffff, 0x200010ff,
685         0x353c, 0xffffffff, 0x0,
686         0x353c, 0xffffffff, 0x0,
687         0x353c, 0xffffffff, 0x0,
688         0x353c, 0xffffffff, 0x0,
689         0x353c, 0xffffffff, 0x0,
690         0x353c, 0xffffffff, 0x7007,
691         0x3538, 0xffffffff, 0x300010ff,
692         0x353c, 0xffffffff, 0x0,
693         0x353c, 0xffffffff, 0x0,
694         0x353c, 0xffffffff, 0x0,
695         0x353c, 0xffffffff, 0x0,
696         0x353c, 0xffffffff, 0x0,
697         0x353c, 0xffffffff, 0x400000,
698         0x3538, 0xffffffff, 0x100010ff,
699         0x353c, 0xffffffff, 0x0,
700         0x353c, 0xffffffff, 0x0,
701         0x353c, 0xffffffff, 0x0,
702         0x353c, 0xffffffff, 0x0,
703         0x353c, 0xffffffff, 0x0,
704         0x353c, 0xffffffff, 0x120200,
705         0x3538, 0xffffffff, 0x500010ff,
706         0x353c, 0xffffffff, 0x0,
707         0x353c, 0xffffffff, 0x0,
708         0x353c, 0xffffffff, 0x0,
709         0x353c, 0xffffffff, 0x0,
710         0x353c, 0xffffffff, 0x0,
711         0x353c, 0xffffffff, 0x1e1e16,
712         0x3538, 0xffffffff, 0x600010ff,
713         0x353c, 0xffffffff, 0x0,
714         0x353c, 0xffffffff, 0x0,
715         0x353c, 0xffffffff, 0x0,
716         0x353c, 0xffffffff, 0x0,
717         0x353c, 0xffffffff, 0x0,
718         0x353c, 0xffffffff, 0x171f1e,
719         0x3538, 0xffffffff, 0x700010ff,
720         0x353c, 0xffffffff, 0x0,
721         0x353c, 0xffffffff, 0x0,
722         0x353c, 0xffffffff, 0x0,
723         0x353c, 0xffffffff, 0x0,
724         0x353c, 0xffffffff, 0x0,
725         0x353c, 0xffffffff, 0x0,
726         0x3538, 0xffffffff, 0x9ff,
727         0x3500, 0xffffffff, 0x0,
728         0x3504, 0xffffffff, 0x10000800,
729         0x3504, 0xffffffff, 0xf,
730         0x3504, 0xffffffff, 0xf,
731         0x3500, 0xffffffff, 0x4,
732         0x3504, 0xffffffff, 0x1000051e,
733         0x3504, 0xffffffff, 0xffff,
734         0x3504, 0xffffffff, 0xffff,
735         0x3500, 0xffffffff, 0x8,
736         0x3504, 0xffffffff, 0x80500,
737         0x3500, 0xffffffff, 0x12,
738         0x3504, 0xffffffff, 0x9050c,
739         0x3500, 0xffffffff, 0x1d,
740         0x3504, 0xffffffff, 0xb052c,
741         0x3500, 0xffffffff, 0x2a,
742         0x3504, 0xffffffff, 0x1053e,
743         0x3500, 0xffffffff, 0x2d,
744         0x3504, 0xffffffff, 0x10546,
745         0x3500, 0xffffffff, 0x30,
746         0x3504, 0xffffffff, 0xa054e,
747         0x3500, 0xffffffff, 0x3c,
748         0x3504, 0xffffffff, 0x1055f,
749         0x3500, 0xffffffff, 0x3f,
750         0x3504, 0xffffffff, 0x10567,
751         0x3500, 0xffffffff, 0x42,
752         0x3504, 0xffffffff, 0x1056f,
753         0x3500, 0xffffffff, 0x45,
754         0x3504, 0xffffffff, 0x10572,
755         0x3500, 0xffffffff, 0x48,
756         0x3504, 0xffffffff, 0x20575,
757         0x3500, 0xffffffff, 0x4c,
758         0x3504, 0xffffffff, 0x190801,
759         0x3500, 0xffffffff, 0x67,
760         0x3504, 0xffffffff, 0x1082a,
761         0x3500, 0xffffffff, 0x6a,
762         0x3504, 0xffffffff, 0x1b082d,
763         0x3500, 0xffffffff, 0x87,
764         0x3504, 0xffffffff, 0x310851,
765         0x3500, 0xffffffff, 0xba,
766         0x3504, 0xffffffff, 0x891,
767         0x3500, 0xffffffff, 0xbc,
768         0x3504, 0xffffffff, 0x893,
769         0x3500, 0xffffffff, 0xbe,
770         0x3504, 0xffffffff, 0x20895,
771         0x3500, 0xffffffff, 0xc2,
772         0x3504, 0xffffffff, 0x20899,
773         0x3500, 0xffffffff, 0xc6,
774         0x3504, 0xffffffff, 0x2089d,
775         0x3500, 0xffffffff, 0xca,
776         0x3504, 0xffffffff, 0x8a1,
777         0x3500, 0xffffffff, 0xcc,
778         0x3504, 0xffffffff, 0x8a3,
779         0x3500, 0xffffffff, 0xce,
780         0x3504, 0xffffffff, 0x308a5,
781         0x3500, 0xffffffff, 0xd3,
782         0x3504, 0xffffffff, 0x6d08cd,
783         0x3500, 0xffffffff, 0x142,
784         0x3504, 0xffffffff, 0x2000095a,
785         0x3504, 0xffffffff, 0x1,
786         0x3500, 0xffffffff, 0x144,
787         0x3504, 0xffffffff, 0x301f095b,
788         0x3500, 0xffffffff, 0x165,
789         0x3504, 0xffffffff, 0xc094d,
790         0x3500, 0xffffffff, 0x173,
791         0x3504, 0xffffffff, 0xf096d,
792         0x3500, 0xffffffff, 0x184,
793         0x3504, 0xffffffff, 0x15097f,
794         0x3500, 0xffffffff, 0x19b,
795         0x3504, 0xffffffff, 0xc0998,
796         0x3500, 0xffffffff, 0x1a9,
797         0x3504, 0xffffffff, 0x409a7,
798         0x3500, 0xffffffff, 0x1af,
799         0x3504, 0xffffffff, 0xcdc,
800         0x3500, 0xffffffff, 0x1b1,
801         0x3504, 0xffffffff, 0x800,
802         0x3508, 0xffffffff, 0x6c9b2000,
803         0x3510, 0xfc00, 0x2000,
804         0x3544, 0xffffffff, 0xfc0,
805         0x28d4, 0x00000100, 0x100
806 };
807
808 static void si_init_golden_registers(struct radeon_device *rdev)
809 {
810         switch (rdev->family) {
811         case CHIP_TAHITI:
812                 radeon_program_register_sequence(rdev,
813                                                  tahiti_golden_registers,
814                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
815                 radeon_program_register_sequence(rdev,
816                                                  tahiti_golden_rlc_registers,
817                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
818                 radeon_program_register_sequence(rdev,
819                                                  tahiti_mgcg_cgcg_init,
820                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
821                 radeon_program_register_sequence(rdev,
822                                                  tahiti_golden_registers2,
823                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
824                 break;
825         case CHIP_PITCAIRN:
826                 radeon_program_register_sequence(rdev,
827                                                  pitcairn_golden_registers,
828                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
829                 radeon_program_register_sequence(rdev,
830                                                  pitcairn_golden_rlc_registers,
831                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
832                 radeon_program_register_sequence(rdev,
833                                                  pitcairn_mgcg_cgcg_init,
834                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
835                 break;
836         case CHIP_VERDE:
837                 radeon_program_register_sequence(rdev,
838                                                  verde_golden_registers,
839                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
840                 radeon_program_register_sequence(rdev,
841                                                  verde_golden_rlc_registers,
842                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
843                 radeon_program_register_sequence(rdev,
844                                                  verde_mgcg_cgcg_init,
845                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
846                 radeon_program_register_sequence(rdev,
847                                                  verde_pg_init,
848                                                  (const u32)ARRAY_SIZE(verde_pg_init));
849                 break;
850         case CHIP_OLAND:
851                 radeon_program_register_sequence(rdev,
852                                                  oland_golden_registers,
853                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
854                 radeon_program_register_sequence(rdev,
855                                                  oland_golden_rlc_registers,
856                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
857                 radeon_program_register_sequence(rdev,
858                                                  oland_mgcg_cgcg_init,
859                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
860                 break;
861         default:
862                 break;
863         }
864 }
865
866 #define PCIE_BUS_CLK                10000
867 #define TCLK                        (PCIE_BUS_CLK / 10)
868
869 /**
870  * si_get_xclk - get the xclk
871  *
872  * @rdev: radeon_device pointer
873  *
874  * Returns the reference clock used by the gfx engine
875  * (SI).
876  */
877 u32 si_get_xclk(struct radeon_device *rdev)
878 {
879         u32 reference_clock = rdev->clock.spll.reference_freq;
880         u32 tmp;
881
882         tmp = RREG32(CG_CLKPIN_CNTL_2);
883         if (tmp & MUX_TCLK_TO_XCLK)
884                 return TCLK;
885
886         tmp = RREG32(CG_CLKPIN_CNTL);
887         if (tmp & XTALIN_DIVIDE)
888                 return reference_clock / 4;
889
890         return reference_clock;
891 }
892
893 /* get temperature in millidegrees */
894 int si_get_temp(struct radeon_device *rdev)
895 {
896         u32 temp;
897         int actual_temp = 0;
898
899         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
900                 CTF_TEMP_SHIFT;
901
902         if (temp & 0x200)
903                 actual_temp = 255;
904         else
905                 actual_temp = temp & 0x1ff;
906
907         actual_temp = (actual_temp * 1000);
908
909         return actual_temp;
910 }
911
912 #define TAHITI_IO_MC_REGS_SIZE 36
913
914 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
915         {0x0000006f, 0x03044000},
916         {0x00000070, 0x0480c018},
917         {0x00000071, 0x00000040},
918         {0x00000072, 0x01000000},
919         {0x00000074, 0x000000ff},
920         {0x00000075, 0x00143400},
921         {0x00000076, 0x08ec0800},
922         {0x00000077, 0x040000cc},
923         {0x00000079, 0x00000000},
924         {0x0000007a, 0x21000409},
925         {0x0000007c, 0x00000000},
926         {0x0000007d, 0xe8000000},
927         {0x0000007e, 0x044408a8},
928         {0x0000007f, 0x00000003},
929         {0x00000080, 0x00000000},
930         {0x00000081, 0x01000000},
931         {0x00000082, 0x02000000},
932         {0x00000083, 0x00000000},
933         {0x00000084, 0xe3f3e4f4},
934         {0x00000085, 0x00052024},
935         {0x00000087, 0x00000000},
936         {0x00000088, 0x66036603},
937         {0x00000089, 0x01000000},
938         {0x0000008b, 0x1c0a0000},
939         {0x0000008c, 0xff010000},
940         {0x0000008e, 0xffffefff},
941         {0x0000008f, 0xfff3efff},
942         {0x00000090, 0xfff3efbf},
943         {0x00000094, 0x00101101},
944         {0x00000095, 0x00000fff},
945         {0x00000096, 0x00116fff},
946         {0x00000097, 0x60010000},
947         {0x00000098, 0x10010000},
948         {0x00000099, 0x00006000},
949         {0x0000009a, 0x00001000},
950         {0x0000009f, 0x00a77400}
951 };
952
953 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
954         {0x0000006f, 0x03044000},
955         {0x00000070, 0x0480c018},
956         {0x00000071, 0x00000040},
957         {0x00000072, 0x01000000},
958         {0x00000074, 0x000000ff},
959         {0x00000075, 0x00143400},
960         {0x00000076, 0x08ec0800},
961         {0x00000077, 0x040000cc},
962         {0x00000079, 0x00000000},
963         {0x0000007a, 0x21000409},
964         {0x0000007c, 0x00000000},
965         {0x0000007d, 0xe8000000},
966         {0x0000007e, 0x044408a8},
967         {0x0000007f, 0x00000003},
968         {0x00000080, 0x00000000},
969         {0x00000081, 0x01000000},
970         {0x00000082, 0x02000000},
971         {0x00000083, 0x00000000},
972         {0x00000084, 0xe3f3e4f4},
973         {0x00000085, 0x00052024},
974         {0x00000087, 0x00000000},
975         {0x00000088, 0x66036603},
976         {0x00000089, 0x01000000},
977         {0x0000008b, 0x1c0a0000},
978         {0x0000008c, 0xff010000},
979         {0x0000008e, 0xffffefff},
980         {0x0000008f, 0xfff3efff},
981         {0x00000090, 0xfff3efbf},
982         {0x00000094, 0x00101101},
983         {0x00000095, 0x00000fff},
984         {0x00000096, 0x00116fff},
985         {0x00000097, 0x60010000},
986         {0x00000098, 0x10010000},
987         {0x00000099, 0x00006000},
988         {0x0000009a, 0x00001000},
989         {0x0000009f, 0x00a47400}
990 };
991
992 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
993         {0x0000006f, 0x03044000},
994         {0x00000070, 0x0480c018},
995         {0x00000071, 0x00000040},
996         {0x00000072, 0x01000000},
997         {0x00000074, 0x000000ff},
998         {0x00000075, 0x00143400},
999         {0x00000076, 0x08ec0800},
1000         {0x00000077, 0x040000cc},
1001         {0x00000079, 0x00000000},
1002         {0x0000007a, 0x21000409},
1003         {0x0000007c, 0x00000000},
1004         {0x0000007d, 0xe8000000},
1005         {0x0000007e, 0x044408a8},
1006         {0x0000007f, 0x00000003},
1007         {0x00000080, 0x00000000},
1008         {0x00000081, 0x01000000},
1009         {0x00000082, 0x02000000},
1010         {0x00000083, 0x00000000},
1011         {0x00000084, 0xe3f3e4f4},
1012         {0x00000085, 0x00052024},
1013         {0x00000087, 0x00000000},
1014         {0x00000088, 0x66036603},
1015         {0x00000089, 0x01000000},
1016         {0x0000008b, 0x1c0a0000},
1017         {0x0000008c, 0xff010000},
1018         {0x0000008e, 0xffffefff},
1019         {0x0000008f, 0xfff3efff},
1020         {0x00000090, 0xfff3efbf},
1021         {0x00000094, 0x00101101},
1022         {0x00000095, 0x00000fff},
1023         {0x00000096, 0x00116fff},
1024         {0x00000097, 0x60010000},
1025         {0x00000098, 0x10010000},
1026         {0x00000099, 0x00006000},
1027         {0x0000009a, 0x00001000},
1028         {0x0000009f, 0x00a37400}
1029 };
1030
1031 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1032         {0x0000006f, 0x03044000},
1033         {0x00000070, 0x0480c018},
1034         {0x00000071, 0x00000040},
1035         {0x00000072, 0x01000000},
1036         {0x00000074, 0x000000ff},
1037         {0x00000075, 0x00143400},
1038         {0x00000076, 0x08ec0800},
1039         {0x00000077, 0x040000cc},
1040         {0x00000079, 0x00000000},
1041         {0x0000007a, 0x21000409},
1042         {0x0000007c, 0x00000000},
1043         {0x0000007d, 0xe8000000},
1044         {0x0000007e, 0x044408a8},
1045         {0x0000007f, 0x00000003},
1046         {0x00000080, 0x00000000},
1047         {0x00000081, 0x01000000},
1048         {0x00000082, 0x02000000},
1049         {0x00000083, 0x00000000},
1050         {0x00000084, 0xe3f3e4f4},
1051         {0x00000085, 0x00052024},
1052         {0x00000087, 0x00000000},
1053         {0x00000088, 0x66036603},
1054         {0x00000089, 0x01000000},
1055         {0x0000008b, 0x1c0a0000},
1056         {0x0000008c, 0xff010000},
1057         {0x0000008e, 0xffffefff},
1058         {0x0000008f, 0xfff3efff},
1059         {0x00000090, 0xfff3efbf},
1060         {0x00000094, 0x00101101},
1061         {0x00000095, 0x00000fff},
1062         {0x00000096, 0x00116fff},
1063         {0x00000097, 0x60010000},
1064         {0x00000098, 0x10010000},
1065         {0x00000099, 0x00006000},
1066         {0x0000009a, 0x00001000},
1067         {0x0000009f, 0x00a17730}
1068 };
1069
1070 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1071         {0x0000006f, 0x03044000},
1072         {0x00000070, 0x0480c018},
1073         {0x00000071, 0x00000040},
1074         {0x00000072, 0x01000000},
1075         {0x00000074, 0x000000ff},
1076         {0x00000075, 0x00143400},
1077         {0x00000076, 0x08ec0800},
1078         {0x00000077, 0x040000cc},
1079         {0x00000079, 0x00000000},
1080         {0x0000007a, 0x21000409},
1081         {0x0000007c, 0x00000000},
1082         {0x0000007d, 0xe8000000},
1083         {0x0000007e, 0x044408a8},
1084         {0x0000007f, 0x00000003},
1085         {0x00000080, 0x00000000},
1086         {0x00000081, 0x01000000},
1087         {0x00000082, 0x02000000},
1088         {0x00000083, 0x00000000},
1089         {0x00000084, 0xe3f3e4f4},
1090         {0x00000085, 0x00052024},
1091         {0x00000087, 0x00000000},
1092         {0x00000088, 0x66036603},
1093         {0x00000089, 0x01000000},
1094         {0x0000008b, 0x1c0a0000},
1095         {0x0000008c, 0xff010000},
1096         {0x0000008e, 0xffffefff},
1097         {0x0000008f, 0xfff3efff},
1098         {0x00000090, 0xfff3efbf},
1099         {0x00000094, 0x00101101},
1100         {0x00000095, 0x00000fff},
1101         {0x00000096, 0x00116fff},
1102         {0x00000097, 0x60010000},
1103         {0x00000098, 0x10010000},
1104         {0x00000099, 0x00006000},
1105         {0x0000009a, 0x00001000},
1106         {0x0000009f, 0x00a07730}
1107 };
1108
1109 /* ucode loading */
1110 static int si_mc_load_microcode(struct radeon_device *rdev)
1111 {
1112         const __be32 *fw_data;
1113         u32 running, blackout = 0;
1114         u32 *io_mc_regs;
1115         int i, ucode_size, regs_size;
1116
1117         if (!rdev->mc_fw)
1118                 return -EINVAL;
1119
1120         switch (rdev->family) {
1121         case CHIP_TAHITI:
1122                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1123                 ucode_size = SI_MC_UCODE_SIZE;
1124                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1125                 break;
1126         case CHIP_PITCAIRN:
1127                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1128                 ucode_size = SI_MC_UCODE_SIZE;
1129                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1130                 break;
1131         case CHIP_VERDE:
1132         default:
1133                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1134                 ucode_size = SI_MC_UCODE_SIZE;
1135                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1136                 break;
1137         case CHIP_OLAND:
1138                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1139                 ucode_size = OLAND_MC_UCODE_SIZE;
1140                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1141                 break;
1142         case CHIP_HAINAN:
1143                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1144                 ucode_size = OLAND_MC_UCODE_SIZE;
1145                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1146                 break;
1147         }
1148
1149         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1150
1151         if (running == 0) {
1152                 if (running) {
1153                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1154                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1155                 }
1156
1157                 /* reset the engine and set to writable */
1158                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1159                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1160
1161                 /* load mc io regs */
1162                 for (i = 0; i < regs_size; i++) {
1163                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1164                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1165                 }
1166                 /* load the MC ucode */
1167                 fw_data = (const __be32 *)rdev->mc_fw->data;
1168                 for (i = 0; i < ucode_size; i++)
1169                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1170
1171                 /* put the engine back into the active state */
1172                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1173                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1174                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1175
1176                 /* wait for training to complete */
1177                 for (i = 0; i < rdev->usec_timeout; i++) {
1178                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1179                                 break;
1180                         udelay(1);
1181                 }
1182                 for (i = 0; i < rdev->usec_timeout; i++) {
1183                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1184                                 break;
1185                         udelay(1);
1186                 }
1187
1188                 if (running)
1189                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1190         }
1191
1192         return 0;
1193 }
1194
1195 static int si_init_microcode(struct radeon_device *rdev)
1196 {
1197         struct platform_device *pdev;
1198         const char *chip_name;
1199         const char *rlc_chip_name;
1200         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1201         char fw_name[30];
1202         int err;
1203
1204         DRM_DEBUG("\n");
1205
1206         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1207         err = IS_ERR(pdev);
1208         if (err) {
1209                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1210                 return -EINVAL;
1211         }
1212
1213         switch (rdev->family) {
1214         case CHIP_TAHITI:
1215                 chip_name = "TAHITI";
1216                 rlc_chip_name = "TAHITI";
1217                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1218                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1219                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1220                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1221                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1222                 break;
1223         case CHIP_PITCAIRN:
1224                 chip_name = "PITCAIRN";
1225                 rlc_chip_name = "PITCAIRN";
1226                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1227                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1228                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1229                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1230                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1231                 break;
1232         case CHIP_VERDE:
1233                 chip_name = "VERDE";
1234                 rlc_chip_name = "VERDE";
1235                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1236                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1237                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1238                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1239                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1240                 break;
1241         case CHIP_OLAND:
1242                 chip_name = "OLAND";
1243                 rlc_chip_name = "OLAND";
1244                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1245                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1246                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1247                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1248                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1249                 break;
1250         case CHIP_HAINAN:
1251                 chip_name = "HAINAN";
1252                 rlc_chip_name = "HAINAN";
1253                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1254                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1255                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1256                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1257                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1258                 break;
1259         default: BUG();
1260         }
1261
1262         DRM_INFO("Loading %s Microcode\n", chip_name);
1263
1264         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1265         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1266         if (err)
1267                 goto out;
1268         if (rdev->pfp_fw->size != pfp_req_size) {
1269                 printk(KERN_ERR
1270                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1271                        rdev->pfp_fw->size, fw_name);
1272                 err = -EINVAL;
1273                 goto out;
1274         }
1275
1276         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1277         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1278         if (err)
1279                 goto out;
1280         if (rdev->me_fw->size != me_req_size) {
1281                 printk(KERN_ERR
1282                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1283                        rdev->me_fw->size, fw_name);
1284                 err = -EINVAL;
1285         }
1286
1287         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1288         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1289         if (err)
1290                 goto out;
1291         if (rdev->ce_fw->size != ce_req_size) {
1292                 printk(KERN_ERR
1293                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1294                        rdev->ce_fw->size, fw_name);
1295                 err = -EINVAL;
1296         }
1297
1298         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1299         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1300         if (err)
1301                 goto out;
1302         if (rdev->rlc_fw->size != rlc_req_size) {
1303                 printk(KERN_ERR
1304                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1305                        rdev->rlc_fw->size, fw_name);
1306                 err = -EINVAL;
1307         }
1308
1309         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1310         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1311         if (err)
1312                 goto out;
1313         if (rdev->mc_fw->size != mc_req_size) {
1314                 printk(KERN_ERR
1315                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1316                        rdev->mc_fw->size, fw_name);
1317                 err = -EINVAL;
1318         }
1319
1320 out:
1321         platform_device_unregister(pdev);
1322
1323         if (err) {
1324                 if (err != -EINVAL)
1325                         printk(KERN_ERR
1326                                "si_cp: Failed to load firmware \"%s\"\n",
1327                                fw_name);
1328                 release_firmware(rdev->pfp_fw);
1329                 rdev->pfp_fw = NULL;
1330                 release_firmware(rdev->me_fw);
1331                 rdev->me_fw = NULL;
1332                 release_firmware(rdev->ce_fw);
1333                 rdev->ce_fw = NULL;
1334                 release_firmware(rdev->rlc_fw);
1335                 rdev->rlc_fw = NULL;
1336                 release_firmware(rdev->mc_fw);
1337                 rdev->mc_fw = NULL;
1338         }
1339         return err;
1340 }
1341
1342 /* watermark setup */
1343 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1344                                    struct radeon_crtc *radeon_crtc,
1345                                    struct drm_display_mode *mode,
1346                                    struct drm_display_mode *other_mode)
1347 {
1348         u32 tmp;
1349         /*
1350          * Line Buffer Setup
1351          * There are 3 line buffers, each one shared by 2 display controllers.
1352          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1353          * the display controllers.  The paritioning is done via one of four
1354          * preset allocations specified in bits 21:20:
1355          *  0 - half lb
1356          *  2 - whole lb, other crtc must be disabled
1357          */
1358         /* this can get tricky if we have two large displays on a paired group
1359          * of crtcs.  Ideally for multiple large displays we'd assign them to
1360          * non-linked crtcs for maximum line buffer allocation.
1361          */
1362         if (radeon_crtc->base.enabled && mode) {
1363                 if (other_mode)
1364                         tmp = 0; /* 1/2 */
1365                 else
1366                         tmp = 2; /* whole */
1367         } else
1368                 tmp = 0;
1369
1370         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1371                DC_LB_MEMORY_CONFIG(tmp));
1372
1373         if (radeon_crtc->base.enabled && mode) {
1374                 switch (tmp) {
1375                 case 0:
1376                 default:
1377                         return 4096 * 2;
1378                 case 2:
1379                         return 8192 * 2;
1380                 }
1381         }
1382
1383         /* controller not enabled, so no lb used */
1384         return 0;
1385 }
1386
1387 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1388 {
1389         u32 tmp = RREG32(MC_SHARED_CHMAP);
1390
1391         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1392         case 0:
1393         default:
1394                 return 1;
1395         case 1:
1396                 return 2;
1397         case 2:
1398                 return 4;
1399         case 3:
1400                 return 8;
1401         case 4:
1402                 return 3;
1403         case 5:
1404                 return 6;
1405         case 6:
1406                 return 10;
1407         case 7:
1408                 return 12;
1409         case 8:
1410                 return 16;
1411         }
1412 }
1413
1414 struct dce6_wm_params {
1415         u32 dram_channels; /* number of dram channels */
1416         u32 yclk;          /* bandwidth per dram data pin in kHz */
1417         u32 sclk;          /* engine clock in kHz */
1418         u32 disp_clk;      /* display clock in kHz */
1419         u32 src_width;     /* viewport width */
1420         u32 active_time;   /* active display time in ns */
1421         u32 blank_time;    /* blank time in ns */
1422         bool interlaced;    /* mode is interlaced */
1423         fixed20_12 vsc;    /* vertical scale ratio */
1424         u32 num_heads;     /* number of active crtcs */
1425         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1426         u32 lb_size;       /* line buffer allocated to pipe */
1427         u32 vtaps;         /* vertical scaler taps */
1428 };
1429
1430 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1431 {
1432         /* Calculate raw DRAM Bandwidth */
1433         fixed20_12 dram_efficiency; /* 0.7 */
1434         fixed20_12 yclk, dram_channels, bandwidth;
1435         fixed20_12 a;
1436
1437         a.full = dfixed_const(1000);
1438         yclk.full = dfixed_const(wm->yclk);
1439         yclk.full = dfixed_div(yclk, a);
1440         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1441         a.full = dfixed_const(10);
1442         dram_efficiency.full = dfixed_const(7);
1443         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1444         bandwidth.full = dfixed_mul(dram_channels, yclk);
1445         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1446
1447         return dfixed_trunc(bandwidth);
1448 }
1449
1450 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1451 {
1452         /* Calculate DRAM Bandwidth and the part allocated to display. */
1453         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1454         fixed20_12 yclk, dram_channels, bandwidth;
1455         fixed20_12 a;
1456
1457         a.full = dfixed_const(1000);
1458         yclk.full = dfixed_const(wm->yclk);
1459         yclk.full = dfixed_div(yclk, a);
1460         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1461         a.full = dfixed_const(10);
1462         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1463         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1464         bandwidth.full = dfixed_mul(dram_channels, yclk);
1465         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1466
1467         return dfixed_trunc(bandwidth);
1468 }
1469
1470 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1471 {
1472         /* Calculate the display Data return Bandwidth */
1473         fixed20_12 return_efficiency; /* 0.8 */
1474         fixed20_12 sclk, bandwidth;
1475         fixed20_12 a;
1476
1477         a.full = dfixed_const(1000);
1478         sclk.full = dfixed_const(wm->sclk);
1479         sclk.full = dfixed_div(sclk, a);
1480         a.full = dfixed_const(10);
1481         return_efficiency.full = dfixed_const(8);
1482         return_efficiency.full = dfixed_div(return_efficiency, a);
1483         a.full = dfixed_const(32);
1484         bandwidth.full = dfixed_mul(a, sclk);
1485         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1486
1487         return dfixed_trunc(bandwidth);
1488 }
1489
1490 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1491 {
1492         return 32;
1493 }
1494
1495 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1496 {
1497         /* Calculate the DMIF Request Bandwidth */
1498         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1499         fixed20_12 disp_clk, sclk, bandwidth;
1500         fixed20_12 a, b1, b2;
1501         u32 min_bandwidth;
1502
1503         a.full = dfixed_const(1000);
1504         disp_clk.full = dfixed_const(wm->disp_clk);
1505         disp_clk.full = dfixed_div(disp_clk, a);
1506         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1507         b1.full = dfixed_mul(a, disp_clk);
1508
1509         a.full = dfixed_const(1000);
1510         sclk.full = dfixed_const(wm->sclk);
1511         sclk.full = dfixed_div(sclk, a);
1512         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1513         b2.full = dfixed_mul(a, sclk);
1514
1515         a.full = dfixed_const(10);
1516         disp_clk_request_efficiency.full = dfixed_const(8);
1517         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1518
1519         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1520
1521         a.full = dfixed_const(min_bandwidth);
1522         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1523
1524         return dfixed_trunc(bandwidth);
1525 }
1526
1527 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1528 {
1529         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1530         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1531         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1532         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1533
1534         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1535 }
1536
1537 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1538 {
1539         /* Calculate the display mode Average Bandwidth
1540          * DisplayMode should contain the source and destination dimensions,
1541          * timing, etc.
1542          */
1543         fixed20_12 bpp;
1544         fixed20_12 line_time;
1545         fixed20_12 src_width;
1546         fixed20_12 bandwidth;
1547         fixed20_12 a;
1548
1549         a.full = dfixed_const(1000);
1550         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1551         line_time.full = dfixed_div(line_time, a);
1552         bpp.full = dfixed_const(wm->bytes_per_pixel);
1553         src_width.full = dfixed_const(wm->src_width);
1554         bandwidth.full = dfixed_mul(src_width, bpp);
1555         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1556         bandwidth.full = dfixed_div(bandwidth, line_time);
1557
1558         return dfixed_trunc(bandwidth);
1559 }
1560
1561 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1562 {
1563         /* First calcualte the latency in ns */
1564         u32 mc_latency = 2000; /* 2000 ns. */
1565         u32 available_bandwidth = dce6_available_bandwidth(wm);
1566         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1567         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1568         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1569         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1570                 (wm->num_heads * cursor_line_pair_return_time);
1571         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1572         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1573         u32 tmp, dmif_size = 12288;
1574         fixed20_12 a, b, c;
1575
1576         if (wm->num_heads == 0)
1577                 return 0;
1578
1579         a.full = dfixed_const(2);
1580         b.full = dfixed_const(1);
1581         if ((wm->vsc.full > a.full) ||
1582             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1583             (wm->vtaps >= 5) ||
1584             ((wm->vsc.full >= a.full) && wm->interlaced))
1585                 max_src_lines_per_dst_line = 4;
1586         else
1587                 max_src_lines_per_dst_line = 2;
1588
1589         a.full = dfixed_const(available_bandwidth);
1590         b.full = dfixed_const(wm->num_heads);
1591         a.full = dfixed_div(a, b);
1592
1593         b.full = dfixed_const(mc_latency + 512);
1594         c.full = dfixed_const(wm->disp_clk);
1595         b.full = dfixed_div(b, c);
1596
1597         c.full = dfixed_const(dmif_size);
1598         b.full = dfixed_div(c, b);
1599
1600         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1601
1602         b.full = dfixed_const(1000);
1603         c.full = dfixed_const(wm->disp_clk);
1604         b.full = dfixed_div(c, b);
1605         c.full = dfixed_const(wm->bytes_per_pixel);
1606         b.full = dfixed_mul(b, c);
1607
1608         lb_fill_bw = min(tmp, dfixed_trunc(b));
1609
1610         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1611         b.full = dfixed_const(1000);
1612         c.full = dfixed_const(lb_fill_bw);
1613         b.full = dfixed_div(c, b);
1614         a.full = dfixed_div(a, b);
1615         line_fill_time = dfixed_trunc(a);
1616
1617         if (line_fill_time < wm->active_time)
1618                 return latency;
1619         else
1620                 return latency + (line_fill_time - wm->active_time);
1621
1622 }
1623
1624 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1625 {
1626         if (dce6_average_bandwidth(wm) <=
1627             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1628                 return true;
1629         else
1630                 return false;
1631 };
1632
1633 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1634 {
1635         if (dce6_average_bandwidth(wm) <=
1636             (dce6_available_bandwidth(wm) / wm->num_heads))
1637                 return true;
1638         else
1639                 return false;
1640 };
1641
1642 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1643 {
1644         u32 lb_partitions = wm->lb_size / wm->src_width;
1645         u32 line_time = wm->active_time + wm->blank_time;
1646         u32 latency_tolerant_lines;
1647         u32 latency_hiding;
1648         fixed20_12 a;
1649
1650         a.full = dfixed_const(1);
1651         if (wm->vsc.full > a.full)
1652                 latency_tolerant_lines = 1;
1653         else {
1654                 if (lb_partitions <= (wm->vtaps + 1))
1655                         latency_tolerant_lines = 1;
1656                 else
1657                         latency_tolerant_lines = 2;
1658         }
1659
1660         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1661
1662         if (dce6_latency_watermark(wm) <= latency_hiding)
1663                 return true;
1664         else
1665                 return false;
1666 }
1667
1668 static void dce6_program_watermarks(struct radeon_device *rdev,
1669                                          struct radeon_crtc *radeon_crtc,
1670                                          u32 lb_size, u32 num_heads)
1671 {
1672         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1673         struct dce6_wm_params wm;
1674         u32 pixel_period;
1675         u32 line_time = 0;
1676         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1677         u32 priority_a_mark = 0, priority_b_mark = 0;
1678         u32 priority_a_cnt = PRIORITY_OFF;
1679         u32 priority_b_cnt = PRIORITY_OFF;
1680         u32 tmp, arb_control3;
1681         fixed20_12 a, b, c;
1682
1683         if (radeon_crtc->base.enabled && num_heads && mode) {
1684                 pixel_period = 1000000 / (u32)mode->clock;
1685                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1686                 priority_a_cnt = 0;
1687                 priority_b_cnt = 0;
1688
1689                 wm.yclk = rdev->pm.current_mclk * 10;
1690                 wm.sclk = rdev->pm.current_sclk * 10;
1691                 wm.disp_clk = mode->clock;
1692                 wm.src_width = mode->crtc_hdisplay;
1693                 wm.active_time = mode->crtc_hdisplay * pixel_period;
1694                 wm.blank_time = line_time - wm.active_time;
1695                 wm.interlaced = false;
1696                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1697                         wm.interlaced = true;
1698                 wm.vsc = radeon_crtc->vsc;
1699                 wm.vtaps = 1;
1700                 if (radeon_crtc->rmx_type != RMX_OFF)
1701                         wm.vtaps = 2;
1702                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1703                 wm.lb_size = lb_size;
1704                 if (rdev->family == CHIP_ARUBA)
1705                         wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1706                 else
1707                         wm.dram_channels = si_get_number_of_dram_channels(rdev);
1708                 wm.num_heads = num_heads;
1709
1710                 /* set for high clocks */
1711                 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1712                 /* set for low clocks */
1713                 /* wm.yclk = low clk; wm.sclk = low clk */
1714                 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1715
1716                 /* possibly force display priority to high */
1717                 /* should really do this at mode validation time... */
1718                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1719                     !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1720                     !dce6_check_latency_hiding(&wm) ||
1721                     (rdev->disp_priority == 2)) {
1722                         DRM_DEBUG_KMS("force priority to high\n");
1723                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1724                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1725                 }
1726
1727                 a.full = dfixed_const(1000);
1728                 b.full = dfixed_const(mode->clock);
1729                 b.full = dfixed_div(b, a);
1730                 c.full = dfixed_const(latency_watermark_a);
1731                 c.full = dfixed_mul(c, b);
1732                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1733                 c.full = dfixed_div(c, a);
1734                 a.full = dfixed_const(16);
1735                 c.full = dfixed_div(c, a);
1736                 priority_a_mark = dfixed_trunc(c);
1737                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1738
1739                 a.full = dfixed_const(1000);
1740                 b.full = dfixed_const(mode->clock);
1741                 b.full = dfixed_div(b, a);
1742                 c.full = dfixed_const(latency_watermark_b);
1743                 c.full = dfixed_mul(c, b);
1744                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1745                 c.full = dfixed_div(c, a);
1746                 a.full = dfixed_const(16);
1747                 c.full = dfixed_div(c, a);
1748                 priority_b_mark = dfixed_trunc(c);
1749                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1750         }
1751
1752         /* select wm A */
1753         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1754         tmp = arb_control3;
1755         tmp &= ~LATENCY_WATERMARK_MASK(3);
1756         tmp |= LATENCY_WATERMARK_MASK(1);
1757         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1758         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1759                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1760                 LATENCY_HIGH_WATERMARK(line_time)));
1761         /* select wm B */
1762         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1763         tmp &= ~LATENCY_WATERMARK_MASK(3);
1764         tmp |= LATENCY_WATERMARK_MASK(2);
1765         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1766         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1767                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1768                 LATENCY_HIGH_WATERMARK(line_time)));
1769         /* restore original selection */
1770         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1771
1772         /* write the priority marks */
1773         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1774         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1775
1776 }
1777
1778 void dce6_bandwidth_update(struct radeon_device *rdev)
1779 {
1780         struct drm_display_mode *mode0 = NULL;
1781         struct drm_display_mode *mode1 = NULL;
1782         u32 num_heads = 0, lb_size;
1783         int i;
1784
1785         radeon_update_display_priority(rdev);
1786
1787         for (i = 0; i < rdev->num_crtc; i++) {
1788                 if (rdev->mode_info.crtcs[i]->base.enabled)
1789                         num_heads++;
1790         }
1791         for (i = 0; i < rdev->num_crtc; i += 2) {
1792                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1793                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1794                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1795                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1796                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1797                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1798         }
1799 }
1800
1801 /*
1802  * Core functions
1803  */
1804 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1805 {
1806         const u32 num_tile_mode_states = 32;
1807         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1808
1809         switch (rdev->config.si.mem_row_size_in_kb) {
1810         case 1:
1811                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1812                 break;
1813         case 2:
1814         default:
1815                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1816                 break;
1817         case 4:
1818                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1819                 break;
1820         }
1821
1822         if ((rdev->family == CHIP_TAHITI) ||
1823             (rdev->family == CHIP_PITCAIRN)) {
1824                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1825                         switch (reg_offset) {
1826                         case 0:  /* non-AA compressed depth or any compressed stencil */
1827                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1828                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1829                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1830                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1831                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1832                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1833                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1834                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1835                                 break;
1836                         case 1:  /* 2xAA/4xAA compressed depth only */
1837                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1838                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1839                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1840                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1841                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1842                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1843                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1844                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1845                                 break;
1846                         case 2:  /* 8xAA compressed depth only */
1847                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1848                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1849                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1850                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1851                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1852                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1853                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1854                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1855                                 break;
1856                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1857                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1858                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1859                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1860                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1861                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1862                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1863                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1864                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1865                                 break;
1866                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1867                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1868                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1869                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1870                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1871                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1872                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1873                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1874                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1875                                 break;
1876                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1878                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1879                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1880                                                  TILE_SPLIT(split_equal_to_row_size) |
1881                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1882                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1883                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1884                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1885                                 break;
1886                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1887                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1888                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1889                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1890                                                  TILE_SPLIT(split_equal_to_row_size) |
1891                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1892                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1893                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1894                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1895                                 break;
1896                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1897                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1898                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1899                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1900                                                  TILE_SPLIT(split_equal_to_row_size) |
1901                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1902                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1903                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1904                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1905                                 break;
1906                         case 8:  /* 1D and 1D Array Surfaces */
1907                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1908                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1909                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1910                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1911                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1912                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1914                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1915                                 break;
1916                         case 9:  /* Displayable maps. */
1917                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1918                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1919                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1920                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1921                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1922                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1923                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1924                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1925                                 break;
1926                         case 10:  /* Display 8bpp. */
1927                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1928                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1929                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1930                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1931                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1932                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1935                                 break;
1936                         case 11:  /* Display 16bpp. */
1937                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1938                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1939                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1940                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1941                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1942                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1943                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1944                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1945                                 break;
1946                         case 12:  /* Display 32bpp. */
1947                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1948                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1949                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1950                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1951                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1952                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1954                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1955                                 break;
1956                         case 13:  /* Thin. */
1957                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1958                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1959                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1960                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1961                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1962                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1965                                 break;
1966                         case 14:  /* Thin 8 bpp. */
1967                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1968                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1969                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1970                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1971                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1972                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1974                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1975                                 break;
1976                         case 15:  /* Thin 16 bpp. */
1977                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1978                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1979                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1980                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1981                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1982                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1983                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1984                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1985                                 break;
1986                         case 16:  /* Thin 32 bpp. */
1987                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1988                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1989                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1990                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1991                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1992                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1995                                 break;
1996                         case 17:  /* Thin 64 bpp. */
1997                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1999                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2000                                                  TILE_SPLIT(split_equal_to_row_size) |
2001                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2002                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2003                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2004                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2005                                 break;
2006                         case 21:  /* 8 bpp PRT. */
2007                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2008                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2009                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2010                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2011                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2012                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2013                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2014                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2015                                 break;
2016                         case 22:  /* 16 bpp PRT */
2017                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2018                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2019                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2020                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2021                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2022                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2023                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2024                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2025                                 break;
2026                         case 23:  /* 32 bpp PRT */
2027                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2028                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2029                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2030                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2031                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2032                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2033                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2034                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2035                                 break;
2036                         case 24:  /* 64 bpp PRT */
2037                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2039                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2040                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2041                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2042                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2043                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2044                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2045                                 break;
2046                         case 25:  /* 128 bpp PRT */
2047                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2049                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2050                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2051                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2052                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2055                                 break;
2056                         default:
2057                                 gb_tile_moden = 0;
2058                                 break;
2059                         }
2060                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2061                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2062                 }
2063         } else if ((rdev->family == CHIP_VERDE) ||
2064                    (rdev->family == CHIP_OLAND) ||
2065                    (rdev->family == CHIP_HAINAN)) {
2066                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2067                         switch (reg_offset) {
2068                         case 0:  /* non-AA compressed depth or any compressed stencil */
2069                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2070                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2071                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2072                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2073                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2074                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2075                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2076                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2077                                 break;
2078                         case 1:  /* 2xAA/4xAA compressed depth only */
2079                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2080                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2081                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2082                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2083                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2084                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2085                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2086                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2087                                 break;
2088                         case 2:  /* 8xAA compressed depth only */
2089                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2091                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2092                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2094                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2095                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2096                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2097                                 break;
2098                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2099                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2100                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2101                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2102                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2103                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2104                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2105                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2106                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2107                                 break;
2108                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2109                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2110                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2111                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2112                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2113                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2114                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2115                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2116                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2117                                 break;
2118                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2119                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2121                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2122                                                  TILE_SPLIT(split_equal_to_row_size) |
2123                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2124                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2125                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2126                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2127                                 break;
2128                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2129                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2131                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132                                                  TILE_SPLIT(split_equal_to_row_size) |
2133                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2134                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2135                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2136                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2137                                 break;
2138                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2139                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2140                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2141                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142                                                  TILE_SPLIT(split_equal_to_row_size) |
2143                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2144                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2145                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2147                                 break;
2148                         case 8:  /* 1D and 1D Array Surfaces */
2149                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2150                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2152                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2153                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2154                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2155                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2156                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2157                                 break;
2158                         case 9:  /* Displayable maps. */
2159                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2160                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2162                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2163                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2164                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2166                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2167                                 break;
2168                         case 10:  /* Display 8bpp. */
2169                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2170                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2171                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2172                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2173                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2174                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2176                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2177                                 break;
2178                         case 11:  /* Display 16bpp. */
2179                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2180                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2181                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2182                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2183                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2184                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2186                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2187                                 break;
2188                         case 12:  /* Display 32bpp. */
2189                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2191                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2192                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2193                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2194                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2196                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2197                                 break;
2198                         case 13:  /* Thin. */
2199                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2201                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2202                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2203                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2204                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2205                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2206                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2207                                 break;
2208                         case 14:  /* Thin 8 bpp. */
2209                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2210                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2211                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2212                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2213                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2214                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2216                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2217                                 break;
2218                         case 15:  /* Thin 16 bpp. */
2219                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2221                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2222                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2223                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2224                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2226                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2227                                 break;
2228                         case 16:  /* Thin 32 bpp. */
2229                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2231                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2232                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2233                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2234                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2236                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2237                                 break;
2238                         case 17:  /* Thin 64 bpp. */
2239                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2241                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2242                                                  TILE_SPLIT(split_equal_to_row_size) |
2243                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2244                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2245                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2246                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2247                                 break;
2248                         case 21:  /* 8 bpp PRT. */
2249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2251                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2252                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2253                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2254                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2255                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2257                                 break;
2258                         case 22:  /* 16 bpp PRT */
2259                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2261                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2262                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2263                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2264                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2267                                 break;
2268                         case 23:  /* 32 bpp PRT */
2269                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2271                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2272                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2273                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2274                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2276                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2277                                 break;
2278                         case 24:  /* 64 bpp PRT */
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2283                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2284                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287                                 break;
2288                         case 25:  /* 128 bpp PRT */
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2293                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2294                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2297                                 break;
2298                         default:
2299                                 gb_tile_moden = 0;
2300                                 break;
2301                         }
2302                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2303                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2304                 }
2305         } else
2306                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2307 }
2308
2309 static void si_select_se_sh(struct radeon_device *rdev,
2310                             u32 se_num, u32 sh_num)
2311 {
2312         u32 data = INSTANCE_BROADCAST_WRITES;
2313
2314         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2315                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2316         else if (se_num == 0xffffffff)
2317                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2318         else if (sh_num == 0xffffffff)
2319                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2320         else
2321                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2322         WREG32(GRBM_GFX_INDEX, data);
2323 }
2324
2325 static u32 si_create_bitmask(u32 bit_width)
2326 {
2327         u32 i, mask = 0;
2328
2329         for (i = 0; i < bit_width; i++) {
2330                 mask <<= 1;
2331                 mask |= 1;
2332         }
2333         return mask;
2334 }
2335
2336 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2337 {
2338         u32 data, mask;
2339
2340         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2341         if (data & 1)
2342                 data &= INACTIVE_CUS_MASK;
2343         else
2344                 data = 0;
2345         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2346
2347         data >>= INACTIVE_CUS_SHIFT;
2348
2349         mask = si_create_bitmask(cu_per_sh);
2350
2351         return ~data & mask;
2352 }
2353
2354 static void si_setup_spi(struct radeon_device *rdev,
2355                          u32 se_num, u32 sh_per_se,
2356                          u32 cu_per_sh)
2357 {
2358         int i, j, k;
2359         u32 data, mask, active_cu;
2360
2361         for (i = 0; i < se_num; i++) {
2362                 for (j = 0; j < sh_per_se; j++) {
2363                         si_select_se_sh(rdev, i, j);
2364                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2365                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2366
2367                         mask = 1;
2368                         for (k = 0; k < 16; k++) {
2369                                 mask <<= k;
2370                                 if (active_cu & mask) {
2371                                         data &= ~mask;
2372                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2373                                         break;
2374                                 }
2375                         }
2376                 }
2377         }
2378         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2379 }
2380
2381 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2382                               u32 max_rb_num, u32 se_num,
2383                               u32 sh_per_se)
2384 {
2385         u32 data, mask;
2386
2387         data = RREG32(CC_RB_BACKEND_DISABLE);
2388         if (data & 1)
2389                 data &= BACKEND_DISABLE_MASK;
2390         else
2391                 data = 0;
2392         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2393
2394         data >>= BACKEND_DISABLE_SHIFT;
2395
2396         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2397
2398         return data & mask;
2399 }
2400
2401 static void si_setup_rb(struct radeon_device *rdev,
2402                         u32 se_num, u32 sh_per_se,
2403                         u32 max_rb_num)
2404 {
2405         int i, j;
2406         u32 data, mask;
2407         u32 disabled_rbs = 0;
2408         u32 enabled_rbs = 0;
2409
2410         for (i = 0; i < se_num; i++) {
2411                 for (j = 0; j < sh_per_se; j++) {
2412                         si_select_se_sh(rdev, i, j);
2413                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2414                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2415                 }
2416         }
2417         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2418
2419         mask = 1;
2420         for (i = 0; i < max_rb_num; i++) {
2421                 if (!(disabled_rbs & mask))
2422                         enabled_rbs |= mask;
2423                 mask <<= 1;
2424         }
2425
2426         for (i = 0; i < se_num; i++) {
2427                 si_select_se_sh(rdev, i, 0xffffffff);
2428                 data = 0;
2429                 for (j = 0; j < sh_per_se; j++) {
2430                         switch (enabled_rbs & 3) {
2431                         case 1:
2432                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2433                                 break;
2434                         case 2:
2435                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2436                                 break;
2437                         case 3:
2438                         default:
2439                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2440                                 break;
2441                         }
2442                         enabled_rbs >>= 2;
2443                 }
2444                 WREG32(PA_SC_RASTER_CONFIG, data);
2445         }
2446         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2447 }
2448
2449 static void si_gpu_init(struct radeon_device *rdev)
2450 {
2451         u32 gb_addr_config = 0;
2452         u32 mc_shared_chmap, mc_arb_ramcfg;
2453         u32 sx_debug_1;
2454         u32 hdp_host_path_cntl;
2455         u32 tmp;
2456         int i, j;
2457
2458         switch (rdev->family) {
2459         case CHIP_TAHITI:
2460                 rdev->config.si.max_shader_engines = 2;
2461                 rdev->config.si.max_tile_pipes = 12;
2462                 rdev->config.si.max_cu_per_sh = 8;
2463                 rdev->config.si.max_sh_per_se = 2;
2464                 rdev->config.si.max_backends_per_se = 4;
2465                 rdev->config.si.max_texture_channel_caches = 12;
2466                 rdev->config.si.max_gprs = 256;
2467                 rdev->config.si.max_gs_threads = 32;
2468                 rdev->config.si.max_hw_contexts = 8;
2469
2470                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2471                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2472                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2473                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2474                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2475                 break;
2476         case CHIP_PITCAIRN:
2477                 rdev->config.si.max_shader_engines = 2;
2478                 rdev->config.si.max_tile_pipes = 8;
2479                 rdev->config.si.max_cu_per_sh = 5;
2480                 rdev->config.si.max_sh_per_se = 2;
2481                 rdev->config.si.max_backends_per_se = 4;
2482                 rdev->config.si.max_texture_channel_caches = 8;
2483                 rdev->config.si.max_gprs = 256;
2484                 rdev->config.si.max_gs_threads = 32;
2485                 rdev->config.si.max_hw_contexts = 8;
2486
2487                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2488                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2489                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2490                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2491                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2492                 break;
2493         case CHIP_VERDE:
2494         default:
2495                 rdev->config.si.max_shader_engines = 1;
2496                 rdev->config.si.max_tile_pipes = 4;
2497                 rdev->config.si.max_cu_per_sh = 2;
2498                 rdev->config.si.max_sh_per_se = 2;
2499                 rdev->config.si.max_backends_per_se = 4;
2500                 rdev->config.si.max_texture_channel_caches = 4;
2501                 rdev->config.si.max_gprs = 256;
2502                 rdev->config.si.max_gs_threads = 32;
2503                 rdev->config.si.max_hw_contexts = 8;
2504
2505                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2506                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2507                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2508                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2509                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2510                 break;
2511         case CHIP_OLAND:
2512                 rdev->config.si.max_shader_engines = 1;
2513                 rdev->config.si.max_tile_pipes = 4;
2514                 rdev->config.si.max_cu_per_sh = 6;
2515                 rdev->config.si.max_sh_per_se = 1;
2516                 rdev->config.si.max_backends_per_se = 2;
2517                 rdev->config.si.max_texture_channel_caches = 4;
2518                 rdev->config.si.max_gprs = 256;
2519                 rdev->config.si.max_gs_threads = 16;
2520                 rdev->config.si.max_hw_contexts = 8;
2521
2522                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2523                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2524                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2525                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2526                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2527                 break;
2528         case CHIP_HAINAN:
2529                 rdev->config.si.max_shader_engines = 1;
2530                 rdev->config.si.max_tile_pipes = 4;
2531                 rdev->config.si.max_cu_per_sh = 5;
2532                 rdev->config.si.max_sh_per_se = 1;
2533                 rdev->config.si.max_backends_per_se = 1;
2534                 rdev->config.si.max_texture_channel_caches = 2;
2535                 rdev->config.si.max_gprs = 256;
2536                 rdev->config.si.max_gs_threads = 16;
2537                 rdev->config.si.max_hw_contexts = 8;
2538
2539                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2540                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2541                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2542                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2543                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2544                 break;
2545         }
2546
2547         /* Initialize HDP */
2548         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2549                 WREG32((0x2c14 + j), 0x00000000);
2550                 WREG32((0x2c18 + j), 0x00000000);
2551                 WREG32((0x2c1c + j), 0x00000000);
2552                 WREG32((0x2c20 + j), 0x00000000);
2553                 WREG32((0x2c24 + j), 0x00000000);
2554         }
2555
2556         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2557
2558         evergreen_fix_pci_max_read_req_size(rdev);
2559
2560         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2561
2562         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2563         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2564
2565         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2566         rdev->config.si.mem_max_burst_length_bytes = 256;
2567         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2568         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2569         if (rdev->config.si.mem_row_size_in_kb > 4)
2570                 rdev->config.si.mem_row_size_in_kb = 4;
2571         /* XXX use MC settings? */
2572         rdev->config.si.shader_engine_tile_size = 32;
2573         rdev->config.si.num_gpus = 1;
2574         rdev->config.si.multi_gpu_tile_size = 64;
2575
2576         /* fix up row size */
2577         gb_addr_config &= ~ROW_SIZE_MASK;
2578         switch (rdev->config.si.mem_row_size_in_kb) {
2579         case 1:
2580         default:
2581                 gb_addr_config |= ROW_SIZE(0);
2582                 break;
2583         case 2:
2584                 gb_addr_config |= ROW_SIZE(1);
2585                 break;
2586         case 4:
2587                 gb_addr_config |= ROW_SIZE(2);
2588                 break;
2589         }
2590
2591         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2592          * not have bank info, so create a custom tiling dword.
2593          * bits 3:0   num_pipes
2594          * bits 7:4   num_banks
2595          * bits 11:8  group_size
2596          * bits 15:12 row_size
2597          */
2598         rdev->config.si.tile_config = 0;
2599         switch (rdev->config.si.num_tile_pipes) {
2600         case 1:
2601                 rdev->config.si.tile_config |= (0 << 0);
2602                 break;
2603         case 2:
2604                 rdev->config.si.tile_config |= (1 << 0);
2605                 break;
2606         case 4:
2607                 rdev->config.si.tile_config |= (2 << 0);
2608                 break;
2609         case 8:
2610         default:
2611                 /* XXX what about 12? */
2612                 rdev->config.si.tile_config |= (3 << 0);
2613                 break;
2614         }       
2615         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2616         case 0: /* four banks */
2617                 rdev->config.si.tile_config |= 0 << 4;
2618                 break;
2619         case 1: /* eight banks */
2620                 rdev->config.si.tile_config |= 1 << 4;
2621                 break;
2622         case 2: /* sixteen banks */
2623         default:
2624                 rdev->config.si.tile_config |= 2 << 4;
2625                 break;
2626         }
2627         rdev->config.si.tile_config |=
2628                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2629         rdev->config.si.tile_config |=
2630                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2631
2632         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2633         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2634         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2635         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2636         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2637         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2638         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2639         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2640         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2641
2642         si_tiling_mode_table_init(rdev);
2643
2644         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2645                     rdev->config.si.max_sh_per_se,
2646                     rdev->config.si.max_backends_per_se);
2647
2648         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2649                      rdev->config.si.max_sh_per_se,
2650                      rdev->config.si.max_cu_per_sh);
2651
2652
2653         /* set HW defaults for 3D engine */
2654         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2655                                      ROQ_IB2_START(0x2b)));
2656         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2657
2658         sx_debug_1 = RREG32(SX_DEBUG_1);
2659         WREG32(SX_DEBUG_1, sx_debug_1);
2660
2661         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2662
2663         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2664                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2665                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2666                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2667
2668         WREG32(VGT_NUM_INSTANCES, 1);
2669
2670         WREG32(CP_PERFMON_CNTL, 0);
2671
2672         WREG32(SQ_CONFIG, 0);
2673
2674         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2675                                           FORCE_EOV_MAX_REZ_CNT(255)));
2676
2677         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2678                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2679
2680         WREG32(VGT_GS_VERTEX_REUSE, 16);
2681         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2682
2683         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2684         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2685         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2686         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2687         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2688         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2689         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2690         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2691
2692         tmp = RREG32(HDP_MISC_CNTL);
2693         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2694         WREG32(HDP_MISC_CNTL, tmp);
2695
2696         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2697         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2698
2699         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2700
2701         udelay(50);
2702 }
2703
2704 /*
2705  * GPU scratch registers helpers function.
2706  */
2707 static void si_scratch_init(struct radeon_device *rdev)
2708 {
2709         int i;
2710
2711         rdev->scratch.num_reg = 7;
2712         rdev->scratch.reg_base = SCRATCH_REG0;
2713         for (i = 0; i < rdev->scratch.num_reg; i++) {
2714                 rdev->scratch.free[i] = true;
2715                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2716         }
2717 }
2718
2719 void si_fence_ring_emit(struct radeon_device *rdev,
2720                         struct radeon_fence *fence)
2721 {
2722         struct radeon_ring *ring = &rdev->ring[fence->ring];
2723         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2724
2725         /* flush read cache over gart */
2726         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2727         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2728         radeon_ring_write(ring, 0);
2729         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2730         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2731                           PACKET3_TC_ACTION_ENA |
2732                           PACKET3_SH_KCACHE_ACTION_ENA |
2733                           PACKET3_SH_ICACHE_ACTION_ENA);
2734         radeon_ring_write(ring, 0xFFFFFFFF);
2735         radeon_ring_write(ring, 0);
2736         radeon_ring_write(ring, 10); /* poll interval */
2737         /* EVENT_WRITE_EOP - flush caches, send int */
2738         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2739         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2740         radeon_ring_write(ring, addr & 0xffffffff);
2741         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2742         radeon_ring_write(ring, fence->seq);
2743         radeon_ring_write(ring, 0);
2744 }
2745
2746 /*
2747  * IB stuff
2748  */
2749 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2750 {
2751         struct radeon_ring *ring = &rdev->ring[ib->ring];
2752         u32 header;
2753
2754         if (ib->is_const_ib) {
2755                 /* set switch buffer packet before const IB */
2756                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2757                 radeon_ring_write(ring, 0);
2758
2759                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2760         } else {
2761                 u32 next_rptr;
2762                 if (ring->rptr_save_reg) {
2763                         next_rptr = ring->wptr + 3 + 4 + 8;
2764                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2765                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2766                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2767                         radeon_ring_write(ring, next_rptr);
2768                 } else if (rdev->wb.enabled) {
2769                         next_rptr = ring->wptr + 5 + 4 + 8;
2770                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2771                         radeon_ring_write(ring, (1 << 8));
2772                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2773                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2774                         radeon_ring_write(ring, next_rptr);
2775                 }
2776
2777                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2778         }
2779
2780         radeon_ring_write(ring, header);
2781         radeon_ring_write(ring,
2782 #ifdef __BIG_ENDIAN
2783                           (2 << 0) |
2784 #endif
2785                           (ib->gpu_addr & 0xFFFFFFFC));
2786         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2787         radeon_ring_write(ring, ib->length_dw |
2788                           (ib->vm ? (ib->vm->id << 24) : 0));
2789
2790         if (!ib->is_const_ib) {
2791                 /* flush read cache over gart for this vmid */
2792                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2793                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2794                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2795                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2796                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2797                                   PACKET3_TC_ACTION_ENA |
2798                                   PACKET3_SH_KCACHE_ACTION_ENA |
2799                                   PACKET3_SH_ICACHE_ACTION_ENA);
2800                 radeon_ring_write(ring, 0xFFFFFFFF);
2801                 radeon_ring_write(ring, 0);
2802                 radeon_ring_write(ring, 10); /* poll interval */
2803         }
2804 }
2805
2806 /*
2807  * CP.
2808  */
2809 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2810 {
2811         if (enable)
2812                 WREG32(CP_ME_CNTL, 0);
2813         else {
2814                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2815                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2816                 WREG32(SCRATCH_UMSK, 0);
2817                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2818                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2819                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2820         }
2821         udelay(50);
2822 }
2823
2824 static int si_cp_load_microcode(struct radeon_device *rdev)
2825 {
2826         const __be32 *fw_data;
2827         int i;
2828
2829         if (!rdev->me_fw || !rdev->pfp_fw)
2830                 return -EINVAL;
2831
2832         si_cp_enable(rdev, false);
2833
2834         /* PFP */
2835         fw_data = (const __be32 *)rdev->pfp_fw->data;
2836         WREG32(CP_PFP_UCODE_ADDR, 0);
2837         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2838                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2839         WREG32(CP_PFP_UCODE_ADDR, 0);
2840
2841         /* CE */
2842         fw_data = (const __be32 *)rdev->ce_fw->data;
2843         WREG32(CP_CE_UCODE_ADDR, 0);
2844         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2845                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2846         WREG32(CP_CE_UCODE_ADDR, 0);
2847
2848         /* ME */
2849         fw_data = (const __be32 *)rdev->me_fw->data;
2850         WREG32(CP_ME_RAM_WADDR, 0);
2851         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2852                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2853         WREG32(CP_ME_RAM_WADDR, 0);
2854
2855         WREG32(CP_PFP_UCODE_ADDR, 0);
2856         WREG32(CP_CE_UCODE_ADDR, 0);
2857         WREG32(CP_ME_RAM_WADDR, 0);
2858         WREG32(CP_ME_RAM_RADDR, 0);
2859         return 0;
2860 }
2861
2862 static int si_cp_start(struct radeon_device *rdev)
2863 {
2864         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2865         int r, i;
2866
2867         r = radeon_ring_lock(rdev, ring, 7 + 4);
2868         if (r) {
2869                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2870                 return r;
2871         }
2872         /* init the CP */
2873         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2874         radeon_ring_write(ring, 0x1);
2875         radeon_ring_write(ring, 0x0);
2876         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
2877         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2878         radeon_ring_write(ring, 0);
2879         radeon_ring_write(ring, 0);
2880
2881         /* init the CE partitions */
2882         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2883         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2884         radeon_ring_write(ring, 0xc000);
2885         radeon_ring_write(ring, 0xe000);
2886         radeon_ring_unlock_commit(rdev, ring);
2887
2888         si_cp_enable(rdev, true);
2889
2890         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
2891         if (r) {
2892                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2893                 return r;
2894         }
2895
2896         /* setup clear context state */
2897         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2898         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2899
2900         for (i = 0; i < si_default_size; i++)
2901                 radeon_ring_write(ring, si_default_state[i]);
2902
2903         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2904         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2905
2906         /* set clear context state */
2907         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2908         radeon_ring_write(ring, 0);
2909
2910         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2911         radeon_ring_write(ring, 0x00000316);
2912         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2913         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2914
2915         radeon_ring_unlock_commit(rdev, ring);
2916
2917         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
2918                 ring = &rdev->ring[i];
2919                 r = radeon_ring_lock(rdev, ring, 2);
2920
2921                 /* clear the compute context state */
2922                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
2923                 radeon_ring_write(ring, 0);
2924
2925                 radeon_ring_unlock_commit(rdev, ring);
2926         }
2927
2928         return 0;
2929 }
2930
2931 static void si_cp_fini(struct radeon_device *rdev)
2932 {
2933         struct radeon_ring *ring;
2934         si_cp_enable(rdev, false);
2935
2936         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2937         radeon_ring_fini(rdev, ring);
2938         radeon_scratch_free(rdev, ring->rptr_save_reg);
2939
2940         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2941         radeon_ring_fini(rdev, ring);
2942         radeon_scratch_free(rdev, ring->rptr_save_reg);
2943
2944         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2945         radeon_ring_fini(rdev, ring);
2946         radeon_scratch_free(rdev, ring->rptr_save_reg);
2947 }
2948
2949 static int si_cp_resume(struct radeon_device *rdev)
2950 {
2951         struct radeon_ring *ring;
2952         u32 tmp;
2953         u32 rb_bufsz;
2954         int r;
2955
2956         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2957         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2958                                  SOFT_RESET_PA |
2959                                  SOFT_RESET_VGT |
2960                                  SOFT_RESET_SPI |
2961                                  SOFT_RESET_SX));
2962         RREG32(GRBM_SOFT_RESET);
2963         mdelay(15);
2964         WREG32(GRBM_SOFT_RESET, 0);
2965         RREG32(GRBM_SOFT_RESET);
2966
2967         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2968         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2969
2970         /* Set the write pointer delay */
2971         WREG32(CP_RB_WPTR_DELAY, 0);
2972
2973         WREG32(CP_DEBUG, 0);
2974         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2975
2976         /* ring 0 - compute and gfx */
2977         /* Set ring buffer size */
2978         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2979         rb_bufsz = drm_order(ring->ring_size / 8);
2980         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2981 #ifdef __BIG_ENDIAN
2982         tmp |= BUF_SWAP_32BIT;
2983 #endif
2984         WREG32(CP_RB0_CNTL, tmp);
2985
2986         /* Initialize the ring buffer's read and write pointers */
2987         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2988         ring->wptr = 0;
2989         WREG32(CP_RB0_WPTR, ring->wptr);
2990
2991         /* set the wb address whether it's enabled or not */
2992         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2993         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2994
2995         if (rdev->wb.enabled)
2996                 WREG32(SCRATCH_UMSK, 0xff);
2997         else {
2998                 tmp |= RB_NO_UPDATE;
2999                 WREG32(SCRATCH_UMSK, 0);
3000         }
3001
3002         mdelay(1);
3003         WREG32(CP_RB0_CNTL, tmp);
3004
3005         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3006
3007         ring->rptr = RREG32(CP_RB0_RPTR);
3008
3009         /* ring1  - compute only */
3010         /* Set ring buffer size */
3011         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3012         rb_bufsz = drm_order(ring->ring_size / 8);
3013         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3014 #ifdef __BIG_ENDIAN
3015         tmp |= BUF_SWAP_32BIT;
3016 #endif
3017         WREG32(CP_RB1_CNTL, tmp);
3018
3019         /* Initialize the ring buffer's read and write pointers */
3020         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3021         ring->wptr = 0;
3022         WREG32(CP_RB1_WPTR, ring->wptr);
3023
3024         /* set the wb address whether it's enabled or not */
3025         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3026         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3027
3028         mdelay(1);
3029         WREG32(CP_RB1_CNTL, tmp);
3030
3031         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3032
3033         ring->rptr = RREG32(CP_RB1_RPTR);
3034
3035         /* ring2 - compute only */
3036         /* Set ring buffer size */
3037         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3038         rb_bufsz = drm_order(ring->ring_size / 8);
3039         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3040 #ifdef __BIG_ENDIAN
3041         tmp |= BUF_SWAP_32BIT;
3042 #endif
3043         WREG32(CP_RB2_CNTL, tmp);
3044
3045         /* Initialize the ring buffer's read and write pointers */
3046         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3047         ring->wptr = 0;
3048         WREG32(CP_RB2_WPTR, ring->wptr);
3049
3050         /* set the wb address whether it's enabled or not */
3051         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3052         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3053
3054         mdelay(1);
3055         WREG32(CP_RB2_CNTL, tmp);
3056
3057         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3058
3059         ring->rptr = RREG32(CP_RB2_RPTR);
3060
3061         /* start the rings */
3062         si_cp_start(rdev);
3063         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3064         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3065         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3066         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3067         if (r) {
3068                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3069                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3070                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3071                 return r;
3072         }
3073         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3074         if (r) {
3075                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3076         }
3077         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3078         if (r) {
3079                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3080         }
3081
3082         return 0;
3083 }
3084
3085 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3086 {
3087         u32 reset_mask = 0;
3088         u32 tmp;
3089
3090         /* GRBM_STATUS */
3091         tmp = RREG32(GRBM_STATUS);
3092         if (tmp & (PA_BUSY | SC_BUSY |
3093                    BCI_BUSY | SX_BUSY |
3094                    TA_BUSY | VGT_BUSY |
3095                    DB_BUSY | CB_BUSY |
3096                    GDS_BUSY | SPI_BUSY |
3097                    IA_BUSY | IA_BUSY_NO_DMA))
3098                 reset_mask |= RADEON_RESET_GFX;
3099
3100         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3101                    CP_BUSY | CP_COHERENCY_BUSY))
3102                 reset_mask |= RADEON_RESET_CP;
3103
3104         if (tmp & GRBM_EE_BUSY)
3105                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3106
3107         /* GRBM_STATUS2 */
3108         tmp = RREG32(GRBM_STATUS2);
3109         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3110                 reset_mask |= RADEON_RESET_RLC;
3111
3112         /* DMA_STATUS_REG 0 */
3113         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3114         if (!(tmp & DMA_IDLE))
3115                 reset_mask |= RADEON_RESET_DMA;
3116
3117         /* DMA_STATUS_REG 1 */
3118         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3119         if (!(tmp & DMA_IDLE))
3120                 reset_mask |= RADEON_RESET_DMA1;
3121
3122         /* SRBM_STATUS2 */
3123         tmp = RREG32(SRBM_STATUS2);
3124         if (tmp & DMA_BUSY)
3125                 reset_mask |= RADEON_RESET_DMA;
3126
3127         if (tmp & DMA1_BUSY)
3128                 reset_mask |= RADEON_RESET_DMA1;
3129
3130         /* SRBM_STATUS */
3131         tmp = RREG32(SRBM_STATUS);
3132
3133         if (tmp & IH_BUSY)
3134                 reset_mask |= RADEON_RESET_IH;
3135
3136         if (tmp & SEM_BUSY)
3137                 reset_mask |= RADEON_RESET_SEM;
3138
3139         if (tmp & GRBM_RQ_PENDING)
3140                 reset_mask |= RADEON_RESET_GRBM;
3141
3142         if (tmp & VMC_BUSY)
3143                 reset_mask |= RADEON_RESET_VMC;
3144
3145         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3146                    MCC_BUSY | MCD_BUSY))
3147                 reset_mask |= RADEON_RESET_MC;
3148
3149         if (evergreen_is_display_hung(rdev))
3150                 reset_mask |= RADEON_RESET_DISPLAY;
3151
3152         /* VM_L2_STATUS */
3153         tmp = RREG32(VM_L2_STATUS);
3154         if (tmp & L2_BUSY)
3155                 reset_mask |= RADEON_RESET_VMC;
3156
3157         /* Skip MC reset as it's mostly likely not hung, just busy */
3158         if (reset_mask & RADEON_RESET_MC) {
3159                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3160                 reset_mask &= ~RADEON_RESET_MC;
3161         }
3162
3163         return reset_mask;
3164 }
3165
3166 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3167 {
3168         struct evergreen_mc_save save;
3169         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3170         u32 tmp;
3171
3172         if (reset_mask == 0)
3173                 return;
3174
3175         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3176
3177         evergreen_print_gpu_status_regs(rdev);
3178         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3179                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3180         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3181                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3182
3183         /* Disable CP parsing/prefetching */
3184         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3185
3186         if (reset_mask & RADEON_RESET_DMA) {
3187                 /* dma0 */
3188                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3189                 tmp &= ~DMA_RB_ENABLE;
3190                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3191         }
3192         if (reset_mask & RADEON_RESET_DMA1) {
3193                 /* dma1 */
3194                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3195                 tmp &= ~DMA_RB_ENABLE;
3196                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3197         }
3198
3199         udelay(50);
3200
3201         evergreen_mc_stop(rdev, &save);
3202         if (evergreen_mc_wait_for_idle(rdev)) {
3203                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3204         }
3205
3206         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3207                 grbm_soft_reset = SOFT_RESET_CB |
3208                         SOFT_RESET_DB |
3209                         SOFT_RESET_GDS |
3210                         SOFT_RESET_PA |
3211                         SOFT_RESET_SC |
3212                         SOFT_RESET_BCI |
3213                         SOFT_RESET_SPI |
3214                         SOFT_RESET_SX |
3215                         SOFT_RESET_TC |
3216                         SOFT_RESET_TA |
3217                         SOFT_RESET_VGT |
3218                         SOFT_RESET_IA;
3219         }
3220
3221         if (reset_mask & RADEON_RESET_CP) {
3222                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3223
3224                 srbm_soft_reset |= SOFT_RESET_GRBM;
3225         }
3226
3227         if (reset_mask & RADEON_RESET_DMA)
3228                 srbm_soft_reset |= SOFT_RESET_DMA;
3229
3230         if (reset_mask & RADEON_RESET_DMA1)
3231                 srbm_soft_reset |= SOFT_RESET_DMA1;
3232
3233         if (reset_mask & RADEON_RESET_DISPLAY)
3234                 srbm_soft_reset |= SOFT_RESET_DC;
3235
3236         if (reset_mask & RADEON_RESET_RLC)
3237                 grbm_soft_reset |= SOFT_RESET_RLC;
3238
3239         if (reset_mask & RADEON_RESET_SEM)
3240                 srbm_soft_reset |= SOFT_RESET_SEM;
3241
3242         if (reset_mask & RADEON_RESET_IH)
3243                 srbm_soft_reset |= SOFT_RESET_IH;
3244
3245         if (reset_mask & RADEON_RESET_GRBM)
3246                 srbm_soft_reset |= SOFT_RESET_GRBM;
3247
3248         if (reset_mask & RADEON_RESET_VMC)
3249                 srbm_soft_reset |= SOFT_RESET_VMC;
3250
3251         if (reset_mask & RADEON_RESET_MC)
3252                 srbm_soft_reset |= SOFT_RESET_MC;
3253
3254         if (grbm_soft_reset) {
3255                 tmp = RREG32(GRBM_SOFT_RESET);
3256                 tmp |= grbm_soft_reset;
3257                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3258                 WREG32(GRBM_SOFT_RESET, tmp);
3259                 tmp = RREG32(GRBM_SOFT_RESET);
3260
3261                 udelay(50);
3262
3263                 tmp &= ~grbm_soft_reset;
3264                 WREG32(GRBM_SOFT_RESET, tmp);
3265                 tmp = RREG32(GRBM_SOFT_RESET);
3266         }
3267
3268         if (srbm_soft_reset) {
3269                 tmp = RREG32(SRBM_SOFT_RESET);
3270                 tmp |= srbm_soft_reset;
3271                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3272                 WREG32(SRBM_SOFT_RESET, tmp);
3273                 tmp = RREG32(SRBM_SOFT_RESET);
3274
3275                 udelay(50);
3276
3277                 tmp &= ~srbm_soft_reset;
3278                 WREG32(SRBM_SOFT_RESET, tmp);
3279                 tmp = RREG32(SRBM_SOFT_RESET);
3280         }
3281
3282         /* Wait a little for things to settle down */
3283         udelay(50);
3284
3285         evergreen_mc_resume(rdev, &save);
3286         udelay(50);
3287
3288         evergreen_print_gpu_status_regs(rdev);
3289 }
3290
3291 int si_asic_reset(struct radeon_device *rdev)
3292 {
3293         u32 reset_mask;
3294
3295         reset_mask = si_gpu_check_soft_reset(rdev);
3296
3297         if (reset_mask)
3298                 r600_set_bios_scratch_engine_hung(rdev, true);
3299
3300         si_gpu_soft_reset(rdev, reset_mask);
3301
3302         reset_mask = si_gpu_check_soft_reset(rdev);
3303
3304         if (!reset_mask)
3305                 r600_set_bios_scratch_engine_hung(rdev, false);
3306
3307         return 0;
3308 }
3309
3310 /**
3311  * si_gfx_is_lockup - Check if the GFX engine is locked up
3312  *
3313  * @rdev: radeon_device pointer
3314  * @ring: radeon_ring structure holding ring information
3315  *
3316  * Check if the GFX engine is locked up.
3317  * Returns true if the engine appears to be locked up, false if not.
3318  */
3319 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3320 {
3321         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3322
3323         if (!(reset_mask & (RADEON_RESET_GFX |
3324                             RADEON_RESET_COMPUTE |
3325                             RADEON_RESET_CP))) {
3326                 radeon_ring_lockup_update(ring);
3327                 return false;
3328         }
3329         /* force CP activities */
3330         radeon_ring_force_activity(rdev, ring);
3331         return radeon_ring_test_lockup(rdev, ring);
3332 }
3333
3334 /**
3335  * si_dma_is_lockup - Check if the DMA engine is locked up
3336  *
3337  * @rdev: radeon_device pointer
3338  * @ring: radeon_ring structure holding ring information
3339  *
3340  * Check if the async DMA engine is locked up.
3341  * Returns true if the engine appears to be locked up, false if not.
3342  */
3343 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3344 {
3345         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3346         u32 mask;
3347
3348         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3349                 mask = RADEON_RESET_DMA;
3350         else
3351                 mask = RADEON_RESET_DMA1;
3352
3353         if (!(reset_mask & mask)) {
3354                 radeon_ring_lockup_update(ring);
3355                 return false;
3356         }
3357         /* force ring activities */
3358         radeon_ring_force_activity(rdev, ring);
3359         return radeon_ring_test_lockup(rdev, ring);
3360 }
3361
3362 /* MC */
3363 static void si_mc_program(struct radeon_device *rdev)
3364 {
3365         struct evergreen_mc_save save;
3366         u32 tmp;
3367         int i, j;
3368
3369         /* Initialize HDP */
3370         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3371                 WREG32((0x2c14 + j), 0x00000000);
3372                 WREG32((0x2c18 + j), 0x00000000);
3373                 WREG32((0x2c1c + j), 0x00000000);
3374                 WREG32((0x2c20 + j), 0x00000000);
3375                 WREG32((0x2c24 + j), 0x00000000);
3376         }
3377         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3378
3379         evergreen_mc_stop(rdev, &save);
3380         if (radeon_mc_wait_for_idle(rdev)) {
3381                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3382         }
3383         if (!ASIC_IS_NODCE(rdev))
3384                 /* Lockout access through VGA aperture*/
3385                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3386         /* Update configuration */
3387         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3388                rdev->mc.vram_start >> 12);
3389         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3390                rdev->mc.vram_end >> 12);
3391         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3392                rdev->vram_scratch.gpu_addr >> 12);
3393         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3394         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3395         WREG32(MC_VM_FB_LOCATION, tmp);
3396         /* XXX double check these! */
3397         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3398         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3399         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3400         WREG32(MC_VM_AGP_BASE, 0);
3401         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3402         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3403         if (radeon_mc_wait_for_idle(rdev)) {
3404                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3405         }
3406         evergreen_mc_resume(rdev, &save);
3407         if (!ASIC_IS_NODCE(rdev)) {
3408                 /* we need to own VRAM, so turn off the VGA renderer here
3409                  * to stop it overwriting our objects */
3410                 rv515_vga_render_disable(rdev);
3411         }
3412 }
3413
3414 static void si_vram_gtt_location(struct radeon_device *rdev,
3415                                  struct radeon_mc *mc)
3416 {
3417         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3418                 /* leave room for at least 1024M GTT */
3419                 dev_warn(rdev->dev, "limiting VRAM\n");
3420                 mc->real_vram_size = 0xFFC0000000ULL;
3421                 mc->mc_vram_size = 0xFFC0000000ULL;
3422         }
3423         radeon_vram_location(rdev, &rdev->mc, 0);
3424         rdev->mc.gtt_base_align = 0;
3425         radeon_gtt_location(rdev, mc);
3426 }
3427
3428 static int si_mc_init(struct radeon_device *rdev)
3429 {
3430         u32 tmp;
3431         int chansize, numchan;
3432
3433         /* Get VRAM informations */
3434         rdev->mc.vram_is_ddr = true;
3435         tmp = RREG32(MC_ARB_RAMCFG);
3436         if (tmp & CHANSIZE_OVERRIDE) {
3437                 chansize = 16;
3438         } else if (tmp & CHANSIZE_MASK) {
3439                 chansize = 64;
3440         } else {
3441                 chansize = 32;
3442         }
3443         tmp = RREG32(MC_SHARED_CHMAP);
3444         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3445         case 0:
3446         default:
3447                 numchan = 1;
3448                 break;
3449         case 1:
3450                 numchan = 2;
3451                 break;
3452         case 2:
3453                 numchan = 4;
3454                 break;
3455         case 3:
3456                 numchan = 8;
3457                 break;
3458         case 4:
3459                 numchan = 3;
3460                 break;
3461         case 5:
3462                 numchan = 6;
3463                 break;
3464         case 6:
3465                 numchan = 10;
3466                 break;
3467         case 7:
3468                 numchan = 12;
3469                 break;
3470         case 8:
3471                 numchan = 16;
3472                 break;
3473         }
3474         rdev->mc.vram_width = numchan * chansize;
3475         /* Could aper size report 0 ? */
3476         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3477         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3478         /* size in MB on si */
3479         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3480         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3481         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3482         si_vram_gtt_location(rdev, &rdev->mc);
3483         radeon_update_bandwidth_info(rdev);
3484
3485         return 0;
3486 }
3487
3488 /*
3489  * GART
3490  */
3491 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3492 {
3493         /* flush hdp cache */
3494         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3495
3496         /* bits 0-15 are the VM contexts0-15 */
3497         WREG32(VM_INVALIDATE_REQUEST, 1);
3498 }
3499
3500 static int si_pcie_gart_enable(struct radeon_device *rdev)
3501 {
3502         int r, i;
3503
3504         if (rdev->gart.robj == NULL) {
3505                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3506                 return -EINVAL;
3507         }
3508         r = radeon_gart_table_vram_pin(rdev);
3509         if (r)
3510                 return r;
3511         radeon_gart_restore(rdev);
3512         /* Setup TLB control */
3513         WREG32(MC_VM_MX_L1_TLB_CNTL,
3514                (0xA << 7) |
3515                ENABLE_L1_TLB |
3516                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3517                ENABLE_ADVANCED_DRIVER_MODEL |
3518                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3519         /* Setup L2 cache */
3520         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3521                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3522                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3523                EFFECTIVE_L2_QUEUE_SIZE(7) |
3524                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3525         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3526         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3527                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3528         /* setup context0 */
3529         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3530         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3531         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3532         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3533                         (u32)(rdev->dummy_page.addr >> 12));
3534         WREG32(VM_CONTEXT0_CNTL2, 0);
3535         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3536                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3537
3538         WREG32(0x15D4, 0);
3539         WREG32(0x15D8, 0);
3540         WREG32(0x15DC, 0);
3541
3542         /* empty context1-15 */
3543         /* set vm size, must be a multiple of 4 */
3544         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3545         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3546         /* Assign the pt base to something valid for now; the pts used for
3547          * the VMs are determined by the application and setup and assigned
3548          * on the fly in the vm part of radeon_gart.c
3549          */
3550         for (i = 1; i < 16; i++) {
3551                 if (i < 8)
3552                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3553                                rdev->gart.table_addr >> 12);
3554                 else
3555                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3556                                rdev->gart.table_addr >> 12);
3557         }
3558
3559         /* enable context1-15 */
3560         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3561                (u32)(rdev->dummy_page.addr >> 12));
3562         WREG32(VM_CONTEXT1_CNTL2, 4);
3563         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3564                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3565                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3566                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3567                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3568                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3569                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3570                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3571                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3572                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3573                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3574                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3575                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3576
3577         si_pcie_gart_tlb_flush(rdev);
3578         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3579                  (unsigned)(rdev->mc.gtt_size >> 20),
3580                  (unsigned long long)rdev->gart.table_addr);
3581         rdev->gart.ready = true;
3582         return 0;
3583 }
3584
3585 static void si_pcie_gart_disable(struct radeon_device *rdev)
3586 {
3587         /* Disable all tables */
3588         WREG32(VM_CONTEXT0_CNTL, 0);
3589         WREG32(VM_CONTEXT1_CNTL, 0);
3590         /* Setup TLB control */
3591         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3592                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3593         /* Setup L2 cache */
3594         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3595                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3596                EFFECTIVE_L2_QUEUE_SIZE(7) |
3597                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3598         WREG32(VM_L2_CNTL2, 0);
3599         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3600                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3601         radeon_gart_table_vram_unpin(rdev);
3602 }
3603
3604 static void si_pcie_gart_fini(struct radeon_device *rdev)
3605 {
3606         si_pcie_gart_disable(rdev);
3607         radeon_gart_table_vram_free(rdev);
3608         radeon_gart_fini(rdev);
3609 }
3610
3611 /* vm parser */
3612 static bool si_vm_reg_valid(u32 reg)
3613 {
3614         /* context regs are fine */
3615         if (reg >= 0x28000)
3616                 return true;
3617
3618         /* check config regs */
3619         switch (reg) {
3620         case GRBM_GFX_INDEX:
3621         case CP_STRMOUT_CNTL:
3622         case VGT_VTX_VECT_EJECT_REG:
3623         case VGT_CACHE_INVALIDATION:
3624         case VGT_ESGS_RING_SIZE:
3625         case VGT_GSVS_RING_SIZE:
3626         case VGT_GS_VERTEX_REUSE:
3627         case VGT_PRIMITIVE_TYPE:
3628         case VGT_INDEX_TYPE:
3629         case VGT_NUM_INDICES:
3630         case VGT_NUM_INSTANCES:
3631         case VGT_TF_RING_SIZE:
3632         case VGT_HS_OFFCHIP_PARAM:
3633         case VGT_TF_MEMORY_BASE:
3634         case PA_CL_ENHANCE:
3635         case PA_SU_LINE_STIPPLE_VALUE:
3636         case PA_SC_LINE_STIPPLE_STATE:
3637         case PA_SC_ENHANCE:
3638         case SQC_CACHES:
3639         case SPI_STATIC_THREAD_MGMT_1:
3640         case SPI_STATIC_THREAD_MGMT_2:
3641         case SPI_STATIC_THREAD_MGMT_3:
3642         case SPI_PS_MAX_WAVE_ID:
3643         case SPI_CONFIG_CNTL:
3644         case SPI_CONFIG_CNTL_1:
3645         case TA_CNTL_AUX:
3646                 return true;
3647         default:
3648                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3649                 return false;
3650         }
3651 }
3652
3653 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3654                                   u32 *ib, struct radeon_cs_packet *pkt)
3655 {
3656         switch (pkt->opcode) {
3657         case PACKET3_NOP:
3658         case PACKET3_SET_BASE:
3659         case PACKET3_SET_CE_DE_COUNTERS:
3660         case PACKET3_LOAD_CONST_RAM:
3661         case PACKET3_WRITE_CONST_RAM:
3662         case PACKET3_WRITE_CONST_RAM_OFFSET:
3663         case PACKET3_DUMP_CONST_RAM:
3664         case PACKET3_INCREMENT_CE_COUNTER:
3665         case PACKET3_WAIT_ON_DE_COUNTER:
3666         case PACKET3_CE_WRITE:
3667                 break;
3668         default:
3669                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3670                 return -EINVAL;
3671         }
3672         return 0;
3673 }
3674
3675 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3676                                    u32 *ib, struct radeon_cs_packet *pkt)
3677 {
3678         u32 idx = pkt->idx + 1;
3679         u32 idx_value = ib[idx];
3680         u32 start_reg, end_reg, reg, i;
3681         u32 command, info;
3682
3683         switch (pkt->opcode) {
3684         case PACKET3_NOP:
3685         case PACKET3_SET_BASE:
3686         case PACKET3_CLEAR_STATE:
3687         case PACKET3_INDEX_BUFFER_SIZE:
3688         case PACKET3_DISPATCH_DIRECT:
3689         case PACKET3_DISPATCH_INDIRECT:
3690         case PACKET3_ALLOC_GDS:
3691         case PACKET3_WRITE_GDS_RAM:
3692         case PACKET3_ATOMIC_GDS:
3693         case PACKET3_ATOMIC:
3694         case PACKET3_OCCLUSION_QUERY:
3695         case PACKET3_SET_PREDICATION:
3696         case PACKET3_COND_EXEC:
3697         case PACKET3_PRED_EXEC:
3698         case PACKET3_DRAW_INDIRECT:
3699         case PACKET3_DRAW_INDEX_INDIRECT:
3700         case PACKET3_INDEX_BASE:
3701         case PACKET3_DRAW_INDEX_2:
3702         case PACKET3_CONTEXT_CONTROL:
3703         case PACKET3_INDEX_TYPE:
3704         case PACKET3_DRAW_INDIRECT_MULTI:
3705         case PACKET3_DRAW_INDEX_AUTO:
3706         case PACKET3_DRAW_INDEX_IMMD:
3707         case PACKET3_NUM_INSTANCES:
3708         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3709         case PACKET3_STRMOUT_BUFFER_UPDATE:
3710         case PACKET3_DRAW_INDEX_OFFSET_2:
3711         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3712         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3713         case PACKET3_MPEG_INDEX:
3714         case PACKET3_WAIT_REG_MEM:
3715         case PACKET3_MEM_WRITE:
3716         case PACKET3_PFP_SYNC_ME:
3717         case PACKET3_SURFACE_SYNC:
3718         case PACKET3_EVENT_WRITE:
3719         case PACKET3_EVENT_WRITE_EOP:
3720         case PACKET3_EVENT_WRITE_EOS:
3721         case PACKET3_SET_CONTEXT_REG:
3722         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3723         case PACKET3_SET_SH_REG:
3724         case PACKET3_SET_SH_REG_OFFSET:
3725         case PACKET3_INCREMENT_DE_COUNTER:
3726         case PACKET3_WAIT_ON_CE_COUNTER:
3727         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3728         case PACKET3_ME_WRITE:
3729                 break;
3730         case PACKET3_COPY_DATA:
3731                 if ((idx_value & 0xf00) == 0) {
3732                         reg = ib[idx + 3] * 4;
3733                         if (!si_vm_reg_valid(reg))
3734                                 return -EINVAL;
3735                 }
3736                 break;
3737         case PACKET3_WRITE_DATA:
3738                 if ((idx_value & 0xf00) == 0) {
3739                         start_reg = ib[idx + 1] * 4;
3740                         if (idx_value & 0x10000) {
3741                                 if (!si_vm_reg_valid(start_reg))
3742                                         return -EINVAL;
3743                         } else {
3744                                 for (i = 0; i < (pkt->count - 2); i++) {
3745                                         reg = start_reg + (4 * i);
3746                                         if (!si_vm_reg_valid(reg))
3747                                                 return -EINVAL;
3748                                 }
3749                         }
3750                 }
3751                 break;
3752         case PACKET3_COND_WRITE:
3753                 if (idx_value & 0x100) {
3754                         reg = ib[idx + 5] * 4;
3755                         if (!si_vm_reg_valid(reg))
3756                                 return -EINVAL;
3757                 }
3758                 break;
3759         case PACKET3_COPY_DW:
3760                 if (idx_value & 0x2) {
3761                         reg = ib[idx + 3] * 4;
3762                         if (!si_vm_reg_valid(reg))
3763                                 return -EINVAL;
3764                 }
3765                 break;
3766         case PACKET3_SET_CONFIG_REG:
3767                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3768                 end_reg = 4 * pkt->count + start_reg - 4;
3769                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3770                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3771                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3772                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3773                         return -EINVAL;
3774                 }
3775                 for (i = 0; i < pkt->count; i++) {
3776                         reg = start_reg + (4 * i);
3777                         if (!si_vm_reg_valid(reg))
3778                                 return -EINVAL;
3779                 }
3780                 break;
3781         case PACKET3_CP_DMA:
3782                 command = ib[idx + 4];
3783                 info = ib[idx + 1];
3784                 if (command & PACKET3_CP_DMA_CMD_SAS) {
3785                         /* src address space is register */
3786                         if (((info & 0x60000000) >> 29) == 0) {
3787                                 start_reg = idx_value << 2;
3788                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3789                                         reg = start_reg;
3790                                         if (!si_vm_reg_valid(reg)) {
3791                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3792                                                 return -EINVAL;
3793                                         }
3794                                 } else {
3795                                         for (i = 0; i < (command & 0x1fffff); i++) {
3796                                                 reg = start_reg + (4 * i);
3797                                                 if (!si_vm_reg_valid(reg)) {
3798                                                         DRM_ERROR("CP DMA Bad SRC register\n");
3799                                                         return -EINVAL;
3800                                                 }
3801                                         }
3802                                 }
3803                         }
3804                 }
3805                 if (command & PACKET3_CP_DMA_CMD_DAS) {
3806                         /* dst address space is register */
3807                         if (((info & 0x00300000) >> 20) == 0) {
3808                                 start_reg = ib[idx + 2];
3809                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3810                                         reg = start_reg;
3811                                         if (!si_vm_reg_valid(reg)) {
3812                                                 DRM_ERROR("CP DMA Bad DST register\n");
3813                                                 return -EINVAL;
3814                                         }
3815                                 } else {
3816                                         for (i = 0; i < (command & 0x1fffff); i++) {
3817                                                 reg = start_reg + (4 * i);
3818                                                 if (!si_vm_reg_valid(reg)) {
3819                                                         DRM_ERROR("CP DMA Bad DST register\n");
3820                                                         return -EINVAL;
3821                                                 }
3822                                         }
3823                                 }
3824                         }
3825                 }
3826                 break;
3827         default:
3828                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3829                 return -EINVAL;
3830         }
3831         return 0;
3832 }
3833
3834 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3835                                        u32 *ib, struct radeon_cs_packet *pkt)
3836 {
3837         u32 idx = pkt->idx + 1;
3838         u32 idx_value = ib[idx];
3839         u32 start_reg, reg, i;
3840
3841         switch (pkt->opcode) {
3842         case PACKET3_NOP:
3843         case PACKET3_SET_BASE:
3844         case PACKET3_CLEAR_STATE:
3845         case PACKET3_DISPATCH_DIRECT:
3846         case PACKET3_DISPATCH_INDIRECT:
3847         case PACKET3_ALLOC_GDS:
3848         case PACKET3_WRITE_GDS_RAM:
3849         case PACKET3_ATOMIC_GDS:
3850         case PACKET3_ATOMIC:
3851         case PACKET3_OCCLUSION_QUERY:
3852         case PACKET3_SET_PREDICATION:
3853         case PACKET3_COND_EXEC:
3854         case PACKET3_PRED_EXEC:
3855         case PACKET3_CONTEXT_CONTROL:
3856         case PACKET3_STRMOUT_BUFFER_UPDATE:
3857         case PACKET3_WAIT_REG_MEM:
3858         case PACKET3_MEM_WRITE:
3859         case PACKET3_PFP_SYNC_ME:
3860         case PACKET3_SURFACE_SYNC:
3861         case PACKET3_EVENT_WRITE:
3862         case PACKET3_EVENT_WRITE_EOP:
3863         case PACKET3_EVENT_WRITE_EOS:
3864         case PACKET3_SET_CONTEXT_REG:
3865         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3866         case PACKET3_SET_SH_REG:
3867         case PACKET3_SET_SH_REG_OFFSET:
3868         case PACKET3_INCREMENT_DE_COUNTER:
3869         case PACKET3_WAIT_ON_CE_COUNTER:
3870         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3871         case PACKET3_ME_WRITE:
3872                 break;
3873         case PACKET3_COPY_DATA:
3874                 if ((idx_value & 0xf00) == 0) {
3875                         reg = ib[idx + 3] * 4;
3876                         if (!si_vm_reg_valid(reg))
3877                                 return -EINVAL;
3878                 }
3879                 break;
3880         case PACKET3_WRITE_DATA:
3881                 if ((idx_value & 0xf00) == 0) {
3882                         start_reg = ib[idx + 1] * 4;
3883                         if (idx_value & 0x10000) {
3884                                 if (!si_vm_reg_valid(start_reg))
3885                                         return -EINVAL;
3886                         } else {
3887                                 for (i = 0; i < (pkt->count - 2); i++) {
3888                                         reg = start_reg + (4 * i);
3889                                         if (!si_vm_reg_valid(reg))
3890                                                 return -EINVAL;
3891                                 }
3892                         }
3893                 }
3894                 break;
3895         case PACKET3_COND_WRITE:
3896                 if (idx_value & 0x100) {
3897                         reg = ib[idx + 5] * 4;
3898                         if (!si_vm_reg_valid(reg))
3899                                 return -EINVAL;
3900                 }
3901                 break;
3902         case PACKET3_COPY_DW:
3903                 if (idx_value & 0x2) {
3904                         reg = ib[idx + 3] * 4;
3905                         if (!si_vm_reg_valid(reg))
3906                                 return -EINVAL;
3907                 }
3908                 break;
3909         default:
3910                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
3911                 return -EINVAL;
3912         }
3913         return 0;
3914 }
3915
3916 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3917 {
3918         int ret = 0;
3919         u32 idx = 0;
3920         struct radeon_cs_packet pkt;
3921
3922         do {
3923                 pkt.idx = idx;
3924                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3925                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3926                 pkt.one_reg_wr = 0;
3927                 switch (pkt.type) {
3928                 case RADEON_PACKET_TYPE0:
3929                         dev_err(rdev->dev, "Packet0 not allowed!\n");
3930                         ret = -EINVAL;
3931                         break;
3932                 case RADEON_PACKET_TYPE2:
3933                         idx += 1;
3934                         break;
3935                 case RADEON_PACKET_TYPE3:
3936                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3937                         if (ib->is_const_ib)
3938                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
3939                         else {
3940                                 switch (ib->ring) {
3941                                 case RADEON_RING_TYPE_GFX_INDEX:
3942                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
3943                                         break;
3944                                 case CAYMAN_RING_TYPE_CP1_INDEX:
3945                                 case CAYMAN_RING_TYPE_CP2_INDEX:
3946                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
3947                                         break;
3948                                 default:
3949                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
3950                                         ret = -EINVAL;
3951                                         break;
3952                                 }
3953                         }
3954                         idx += pkt.count + 2;
3955                         break;
3956                 default:
3957                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3958                         ret = -EINVAL;
3959                         break;
3960                 }
3961                 if (ret)
3962                         break;
3963         } while (idx < ib->length_dw);
3964
3965         return ret;
3966 }
3967
3968 /*
3969  * vm
3970  */
3971 int si_vm_init(struct radeon_device *rdev)
3972 {
3973         /* number of VMs */
3974         rdev->vm_manager.nvm = 16;
3975         /* base offset of vram pages */
3976         rdev->vm_manager.vram_base_offset = 0;
3977
3978         return 0;
3979 }
3980
3981 void si_vm_fini(struct radeon_device *rdev)
3982 {
3983 }
3984
3985 /**
3986  * si_vm_set_page - update the page tables using the CP
3987  *
3988  * @rdev: radeon_device pointer
3989  * @ib: indirect buffer to fill with commands
3990  * @pe: addr of the page entry
3991  * @addr: dst addr to write into pe
3992  * @count: number of page entries to update
3993  * @incr: increase next addr by incr bytes
3994  * @flags: access flags
3995  *
3996  * Update the page tables using the CP (SI).
3997  */
3998 void si_vm_set_page(struct radeon_device *rdev,
3999                     struct radeon_ib *ib,
4000                     uint64_t pe,
4001                     uint64_t addr, unsigned count,
4002                     uint32_t incr, uint32_t flags)
4003 {
4004         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4005         uint64_t value;
4006         unsigned ndw;
4007
4008         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4009                 while (count) {
4010                         ndw = 2 + count * 2;
4011                         if (ndw > 0x3FFE)
4012                                 ndw = 0x3FFE;
4013
4014                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4015                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4016                                         WRITE_DATA_DST_SEL(1));
4017                         ib->ptr[ib->length_dw++] = pe;
4018                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4019                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4020                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4021                                         value = radeon_vm_map_gart(rdev, addr);
4022                                         value &= 0xFFFFFFFFFFFFF000ULL;
4023                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4024                                         value = addr;
4025                                 } else {
4026                                         value = 0;
4027                                 }
4028                                 addr += incr;
4029                                 value |= r600_flags;
4030                                 ib->ptr[ib->length_dw++] = value;
4031                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4032                         }
4033                 }
4034         } else {
4035                 /* DMA */
4036                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4037                         while (count) {
4038                                 ndw = count * 2;
4039                                 if (ndw > 0xFFFFE)
4040                                         ndw = 0xFFFFE;
4041
4042                                 /* for non-physically contiguous pages (system) */
4043                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4044                                 ib->ptr[ib->length_dw++] = pe;
4045                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4046                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4047                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4048                                                 value = radeon_vm_map_gart(rdev, addr);
4049                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4050                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4051                                                 value = addr;
4052                                         } else {
4053                                                 value = 0;
4054                                         }
4055                                         addr += incr;
4056                                         value |= r600_flags;
4057                                         ib->ptr[ib->length_dw++] = value;
4058                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4059                                 }
4060                         }
4061                 } else {
4062                         while (count) {
4063                                 ndw = count * 2;
4064                                 if (ndw > 0xFFFFE)
4065                                         ndw = 0xFFFFE;
4066
4067                                 if (flags & RADEON_VM_PAGE_VALID)
4068                                         value = addr;
4069                                 else
4070                                         value = 0;
4071                                 /* for physically contiguous pages (vram) */
4072                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4073                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4074                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4075                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4076                                 ib->ptr[ib->length_dw++] = 0;
4077                                 ib->ptr[ib->length_dw++] = value; /* value */
4078                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4079                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4080                                 ib->ptr[ib->length_dw++] = 0;
4081                                 pe += ndw * 4;
4082                                 addr += (ndw / 2) * incr;
4083                                 count -= ndw / 2;
4084                         }
4085                 }
4086                 while (ib->length_dw & 0x7)
4087                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4088         }
4089 }
4090
4091 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4092 {
4093         struct radeon_ring *ring = &rdev->ring[ridx];
4094
4095         if (vm == NULL)
4096                 return;
4097
4098         /* write new base address */
4099         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4100         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4101                                  WRITE_DATA_DST_SEL(0)));
4102
4103         if (vm->id < 8) {
4104                 radeon_ring_write(ring,
4105                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4106         } else {
4107                 radeon_ring_write(ring,
4108                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4109         }
4110         radeon_ring_write(ring, 0);
4111         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4112
4113         /* flush hdp cache */
4114         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4115         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4116                                  WRITE_DATA_DST_SEL(0)));
4117         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4118         radeon_ring_write(ring, 0);
4119         radeon_ring_write(ring, 0x1);
4120
4121         /* bits 0-15 are the VM contexts0-15 */
4122         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4123         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4124                                  WRITE_DATA_DST_SEL(0)));
4125         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4126         radeon_ring_write(ring, 0);
4127         radeon_ring_write(ring, 1 << vm->id);
4128
4129         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4130         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4131         radeon_ring_write(ring, 0x0);
4132 }
4133
4134 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4135 {
4136         struct radeon_ring *ring = &rdev->ring[ridx];
4137
4138         if (vm == NULL)
4139                 return;
4140
4141         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4142         if (vm->id < 8) {
4143                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4144         } else {
4145                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4146         }
4147         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4148
4149         /* flush hdp cache */
4150         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4151         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4152         radeon_ring_write(ring, 1);
4153
4154         /* bits 0-7 are the VM contexts0-7 */
4155         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4156         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4157         radeon_ring_write(ring, 1 << vm->id);
4158 }
4159
4160 /*
4161  * RLC
4162  */
4163 void si_rlc_fini(struct radeon_device *rdev)
4164 {
4165         int r;
4166
4167         /* save restore block */
4168         if (rdev->rlc.save_restore_obj) {
4169                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4170                 if (unlikely(r != 0))
4171                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4172                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4173                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4174
4175                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4176                 rdev->rlc.save_restore_obj = NULL;
4177         }
4178
4179         /* clear state block */
4180         if (rdev->rlc.clear_state_obj) {
4181                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4182                 if (unlikely(r != 0))
4183                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4184                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4185                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4186
4187                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4188                 rdev->rlc.clear_state_obj = NULL;
4189         }
4190 }
4191
4192 int si_rlc_init(struct radeon_device *rdev)
4193 {
4194         int r;
4195
4196         /* save restore block */
4197         if (rdev->rlc.save_restore_obj == NULL) {
4198                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4199                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4200                                      &rdev->rlc.save_restore_obj);
4201                 if (r) {
4202                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4203                         return r;
4204                 }
4205         }
4206
4207         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4208         if (unlikely(r != 0)) {
4209                 si_rlc_fini(rdev);
4210                 return r;
4211         }
4212         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4213                           &rdev->rlc.save_restore_gpu_addr);
4214         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4215         if (r) {
4216                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4217                 si_rlc_fini(rdev);
4218                 return r;
4219         }
4220
4221         /* clear state block */
4222         if (rdev->rlc.clear_state_obj == NULL) {
4223                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4224                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4225                                      &rdev->rlc.clear_state_obj);
4226                 if (r) {
4227                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4228                         si_rlc_fini(rdev);
4229                         return r;
4230                 }
4231         }
4232         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4233         if (unlikely(r != 0)) {
4234                 si_rlc_fini(rdev);
4235                 return r;
4236         }
4237         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4238                           &rdev->rlc.clear_state_gpu_addr);
4239         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4240         if (r) {
4241                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4242                 si_rlc_fini(rdev);
4243                 return r;
4244         }
4245
4246         return 0;
4247 }
4248
4249 static void si_rlc_stop(struct radeon_device *rdev)
4250 {
4251         WREG32(RLC_CNTL, 0);
4252 }
4253
4254 static void si_rlc_start(struct radeon_device *rdev)
4255 {
4256         WREG32(RLC_CNTL, RLC_ENABLE);
4257 }
4258
4259 static int si_rlc_resume(struct radeon_device *rdev)
4260 {
4261         u32 i;
4262         const __be32 *fw_data;
4263
4264         if (!rdev->rlc_fw)
4265                 return -EINVAL;
4266
4267         si_rlc_stop(rdev);
4268
4269         WREG32(RLC_RL_BASE, 0);
4270         WREG32(RLC_RL_SIZE, 0);
4271         WREG32(RLC_LB_CNTL, 0);
4272         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4273         WREG32(RLC_LB_CNTR_INIT, 0);
4274
4275         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4276         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4277
4278         WREG32(RLC_MC_CNTL, 0);
4279         WREG32(RLC_UCODE_CNTL, 0);
4280
4281         fw_data = (const __be32 *)rdev->rlc_fw->data;
4282         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4283                 WREG32(RLC_UCODE_ADDR, i);
4284                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4285         }
4286         WREG32(RLC_UCODE_ADDR, 0);
4287
4288         si_rlc_start(rdev);
4289
4290         return 0;
4291 }
4292
4293 static void si_enable_interrupts(struct radeon_device *rdev)
4294 {
4295         u32 ih_cntl = RREG32(IH_CNTL);
4296         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4297
4298         ih_cntl |= ENABLE_INTR;
4299         ih_rb_cntl |= IH_RB_ENABLE;
4300         WREG32(IH_CNTL, ih_cntl);
4301         WREG32(IH_RB_CNTL, ih_rb_cntl);
4302         rdev->ih.enabled = true;
4303 }
4304
4305 static void si_disable_interrupts(struct radeon_device *rdev)
4306 {
4307         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4308         u32 ih_cntl = RREG32(IH_CNTL);
4309
4310         ih_rb_cntl &= ~IH_RB_ENABLE;
4311         ih_cntl &= ~ENABLE_INTR;
4312         WREG32(IH_RB_CNTL, ih_rb_cntl);
4313         WREG32(IH_CNTL, ih_cntl);
4314         /* set rptr, wptr to 0 */
4315         WREG32(IH_RB_RPTR, 0);
4316         WREG32(IH_RB_WPTR, 0);
4317         rdev->ih.enabled = false;
4318         rdev->ih.rptr = 0;
4319 }
4320
4321 static void si_disable_interrupt_state(struct radeon_device *rdev)
4322 {
4323         u32 tmp;
4324
4325         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4326         WREG32(CP_INT_CNTL_RING1, 0);
4327         WREG32(CP_INT_CNTL_RING2, 0);
4328         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4329         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4330         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4331         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4332         WREG32(GRBM_INT_CNTL, 0);
4333         if (rdev->num_crtc >= 2) {
4334                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4335                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4336         }
4337         if (rdev->num_crtc >= 4) {
4338                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4339                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4340         }
4341         if (rdev->num_crtc >= 6) {
4342                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4343                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4344         }
4345
4346         if (rdev->num_crtc >= 2) {
4347                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4348                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4349         }
4350         if (rdev->num_crtc >= 4) {
4351                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4352                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4353         }
4354         if (rdev->num_crtc >= 6) {
4355                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4356                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4357         }
4358
4359         if (!ASIC_IS_NODCE(rdev)) {
4360                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4361
4362                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4363                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4364                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4365                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4366                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4367                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4368                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4369                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4370                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4371                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4372                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4373                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4374         }
4375 }
4376
4377 static int si_irq_init(struct radeon_device *rdev)
4378 {
4379         int ret = 0;
4380         int rb_bufsz;
4381         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4382
4383         /* allocate ring */
4384         ret = r600_ih_ring_alloc(rdev);
4385         if (ret)
4386                 return ret;
4387
4388         /* disable irqs */
4389         si_disable_interrupts(rdev);
4390
4391         /* init rlc */
4392         ret = si_rlc_resume(rdev);
4393         if (ret) {
4394                 r600_ih_ring_fini(rdev);
4395                 return ret;
4396         }
4397
4398         /* setup interrupt control */
4399         /* set dummy read address to ring address */
4400         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4401         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4402         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4403          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4404          */
4405         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4406         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4407         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4408         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4409
4410         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4411         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4412
4413         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4414                       IH_WPTR_OVERFLOW_CLEAR |
4415                       (rb_bufsz << 1));
4416
4417         if (rdev->wb.enabled)
4418                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4419
4420         /* set the writeback address whether it's enabled or not */
4421         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4422         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4423
4424         WREG32(IH_RB_CNTL, ih_rb_cntl);
4425
4426         /* set rptr, wptr to 0 */
4427         WREG32(IH_RB_RPTR, 0);
4428         WREG32(IH_RB_WPTR, 0);
4429
4430         /* Default settings for IH_CNTL (disabled at first) */
4431         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4432         /* RPTR_REARM only works if msi's are enabled */
4433         if (rdev->msi_enabled)
4434                 ih_cntl |= RPTR_REARM;
4435         WREG32(IH_CNTL, ih_cntl);
4436
4437         /* force the active interrupt state to all disabled */
4438         si_disable_interrupt_state(rdev);
4439
4440         pci_set_master(rdev->pdev);
4441
4442         /* enable irqs */
4443         si_enable_interrupts(rdev);
4444
4445         return ret;
4446 }
4447
4448 int si_irq_set(struct radeon_device *rdev)
4449 {
4450         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4451         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4452         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4453         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4454         u32 grbm_int_cntl = 0;
4455         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4456         u32 dma_cntl, dma_cntl1;
4457
4458         if (!rdev->irq.installed) {
4459                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4460                 return -EINVAL;
4461         }
4462         /* don't enable anything if the ih is disabled */
4463         if (!rdev->ih.enabled) {
4464                 si_disable_interrupts(rdev);
4465                 /* force the active interrupt state to all disabled */
4466                 si_disable_interrupt_state(rdev);
4467                 return 0;
4468         }
4469
4470         if (!ASIC_IS_NODCE(rdev)) {
4471                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4472                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4473                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4474                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4475                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4476                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4477         }
4478
4479         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4480         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4481
4482         /* enable CP interrupts on all rings */
4483         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4484                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4485                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4486         }
4487         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4488                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4489                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4490         }
4491         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4492                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4493                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4494         }
4495         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4496                 DRM_DEBUG("si_irq_set: sw int dma\n");
4497                 dma_cntl |= TRAP_ENABLE;
4498         }
4499
4500         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4501                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4502                 dma_cntl1 |= TRAP_ENABLE;
4503         }
4504         if (rdev->irq.crtc_vblank_int[0] ||
4505             atomic_read(&rdev->irq.pflip[0])) {
4506                 DRM_DEBUG("si_irq_set: vblank 0\n");
4507                 crtc1 |= VBLANK_INT_MASK;
4508         }
4509         if (rdev->irq.crtc_vblank_int[1] ||
4510             atomic_read(&rdev->irq.pflip[1])) {
4511                 DRM_DEBUG("si_irq_set: vblank 1\n");
4512                 crtc2 |= VBLANK_INT_MASK;
4513         }
4514         if (rdev->irq.crtc_vblank_int[2] ||
4515             atomic_read(&rdev->irq.pflip[2])) {
4516                 DRM_DEBUG("si_irq_set: vblank 2\n");
4517                 crtc3 |= VBLANK_INT_MASK;
4518         }
4519         if (rdev->irq.crtc_vblank_int[3] ||
4520             atomic_read(&rdev->irq.pflip[3])) {
4521                 DRM_DEBUG("si_irq_set: vblank 3\n");
4522                 crtc4 |= VBLANK_INT_MASK;
4523         }
4524         if (rdev->irq.crtc_vblank_int[4] ||
4525             atomic_read(&rdev->irq.pflip[4])) {
4526                 DRM_DEBUG("si_irq_set: vblank 4\n");
4527                 crtc5 |= VBLANK_INT_MASK;
4528         }
4529         if (rdev->irq.crtc_vblank_int[5] ||
4530             atomic_read(&rdev->irq.pflip[5])) {
4531                 DRM_DEBUG("si_irq_set: vblank 5\n");
4532                 crtc6 |= VBLANK_INT_MASK;
4533         }
4534         if (rdev->irq.hpd[0]) {
4535                 DRM_DEBUG("si_irq_set: hpd 1\n");
4536                 hpd1 |= DC_HPDx_INT_EN;
4537         }
4538         if (rdev->irq.hpd[1]) {
4539                 DRM_DEBUG("si_irq_set: hpd 2\n");
4540                 hpd2 |= DC_HPDx_INT_EN;
4541         }
4542         if (rdev->irq.hpd[2]) {
4543                 DRM_DEBUG("si_irq_set: hpd 3\n");
4544                 hpd3 |= DC_HPDx_INT_EN;
4545         }
4546         if (rdev->irq.hpd[3]) {
4547                 DRM_DEBUG("si_irq_set: hpd 4\n");
4548                 hpd4 |= DC_HPDx_INT_EN;
4549         }
4550         if (rdev->irq.hpd[4]) {
4551                 DRM_DEBUG("si_irq_set: hpd 5\n");
4552                 hpd5 |= DC_HPDx_INT_EN;
4553         }
4554         if (rdev->irq.hpd[5]) {
4555                 DRM_DEBUG("si_irq_set: hpd 6\n");
4556                 hpd6 |= DC_HPDx_INT_EN;
4557         }
4558
4559         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4560         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4561         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4562
4563         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4564         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4565
4566         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4567
4568         if (rdev->num_crtc >= 2) {
4569                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4570                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4571         }
4572         if (rdev->num_crtc >= 4) {
4573                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4574                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4575         }
4576         if (rdev->num_crtc >= 6) {
4577                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4578                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4579         }
4580
4581         if (rdev->num_crtc >= 2) {
4582                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4583                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4584         }
4585         if (rdev->num_crtc >= 4) {
4586                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4587                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4588         }
4589         if (rdev->num_crtc >= 6) {
4590                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4591                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4592         }
4593
4594         if (!ASIC_IS_NODCE(rdev)) {
4595                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4596                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4597                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4598                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4599                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4600                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4601         }
4602
4603         return 0;
4604 }
4605
4606 static inline void si_irq_ack(struct radeon_device *rdev)
4607 {
4608         u32 tmp;
4609
4610         if (ASIC_IS_NODCE(rdev))
4611                 return;
4612
4613         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4614         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4615         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4616         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4617         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4618         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4619         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4620         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4621         if (rdev->num_crtc >= 4) {
4622                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4623                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4624         }
4625         if (rdev->num_crtc >= 6) {
4626                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4627                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4628         }
4629
4630         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4631                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4632         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4633                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4634         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4635                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4636         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4637                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4638         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4639                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4640         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4641                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4642
4643         if (rdev->num_crtc >= 4) {
4644                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4645                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4646                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4647                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4648                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4649                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4650                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4651                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4652                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4653                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4654                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4655                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4656         }
4657
4658         if (rdev->num_crtc >= 6) {
4659                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4660                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4661                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4662                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4663                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4664                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4665                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4666                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4667                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4668                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4669                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4670                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4671         }
4672
4673         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4674                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4675                 tmp |= DC_HPDx_INT_ACK;
4676                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4677         }
4678         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4679                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4680                 tmp |= DC_HPDx_INT_ACK;
4681                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4682         }
4683         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4684                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4685                 tmp |= DC_HPDx_INT_ACK;
4686                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4687         }
4688         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4689                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4690                 tmp |= DC_HPDx_INT_ACK;
4691                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4692         }
4693         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4694                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4695                 tmp |= DC_HPDx_INT_ACK;
4696                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4697         }
4698         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4699                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4700                 tmp |= DC_HPDx_INT_ACK;
4701                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4702         }
4703 }
4704
4705 static void si_irq_disable(struct radeon_device *rdev)
4706 {
4707         si_disable_interrupts(rdev);
4708         /* Wait and acknowledge irq */
4709         mdelay(1);
4710         si_irq_ack(rdev);
4711         si_disable_interrupt_state(rdev);
4712 }
4713
4714 static void si_irq_suspend(struct radeon_device *rdev)
4715 {
4716         si_irq_disable(rdev);
4717         si_rlc_stop(rdev);
4718 }
4719
4720 static void si_irq_fini(struct radeon_device *rdev)
4721 {
4722         si_irq_suspend(rdev);
4723         r600_ih_ring_fini(rdev);
4724 }
4725
4726 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4727 {
4728         u32 wptr, tmp;
4729
4730         if (rdev->wb.enabled)
4731                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4732         else
4733                 wptr = RREG32(IH_RB_WPTR);
4734
4735         if (wptr & RB_OVERFLOW) {
4736                 /* When a ring buffer overflow happen start parsing interrupt
4737                  * from the last not overwritten vector (wptr + 16). Hopefully
4738                  * this should allow us to catchup.
4739                  */
4740                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4741                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4742                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4743                 tmp = RREG32(IH_RB_CNTL);
4744                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4745                 WREG32(IH_RB_CNTL, tmp);
4746         }
4747         return (wptr & rdev->ih.ptr_mask);
4748 }
4749
4750 /*        SI IV Ring
4751  * Each IV ring entry is 128 bits:
4752  * [7:0]    - interrupt source id
4753  * [31:8]   - reserved
4754  * [59:32]  - interrupt source data
4755  * [63:60]  - reserved
4756  * [71:64]  - RINGID
4757  * [79:72]  - VMID
4758  * [127:80] - reserved
4759  */
4760 int si_irq_process(struct radeon_device *rdev)
4761 {
4762         u32 wptr;
4763         u32 rptr;
4764         u32 src_id, src_data, ring_id;
4765         u32 ring_index;
4766         bool queue_hotplug = false;
4767
4768         if (!rdev->ih.enabled || rdev->shutdown)
4769                 return IRQ_NONE;
4770
4771         wptr = si_get_ih_wptr(rdev);
4772
4773 restart_ih:
4774         /* is somebody else already processing irqs? */
4775         if (atomic_xchg(&rdev->ih.lock, 1))
4776                 return IRQ_NONE;
4777
4778         rptr = rdev->ih.rptr;
4779         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4780
4781         /* Order reading of wptr vs. reading of IH ring data */
4782         rmb();
4783
4784         /* display interrupts */
4785         si_irq_ack(rdev);
4786
4787         while (rptr != wptr) {
4788                 /* wptr/rptr are in bytes! */
4789                 ring_index = rptr / 4;
4790                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4791                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4792                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4793
4794                 switch (src_id) {
4795                 case 1: /* D1 vblank/vline */
4796                         switch (src_data) {
4797                         case 0: /* D1 vblank */
4798                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4799                                         if (rdev->irq.crtc_vblank_int[0]) {
4800                                                 drm_handle_vblank(rdev->ddev, 0);
4801                                                 rdev->pm.vblank_sync = true;
4802                                                 wake_up(&rdev->irq.vblank_queue);
4803                                         }
4804                                         if (atomic_read(&rdev->irq.pflip[0]))
4805                                                 radeon_crtc_handle_flip(rdev, 0);
4806                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4807                                         DRM_DEBUG("IH: D1 vblank\n");
4808                                 }
4809                                 break;
4810                         case 1: /* D1 vline */
4811                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4812                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4813                                         DRM_DEBUG("IH: D1 vline\n");
4814                                 }
4815                                 break;
4816                         default:
4817                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4818                                 break;
4819                         }
4820                         break;
4821                 case 2: /* D2 vblank/vline */
4822                         switch (src_data) {
4823                         case 0: /* D2 vblank */
4824                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4825                                         if (rdev->irq.crtc_vblank_int[1]) {
4826                                                 drm_handle_vblank(rdev->ddev, 1);
4827                                                 rdev->pm.vblank_sync = true;
4828                                                 wake_up(&rdev->irq.vblank_queue);
4829                                         }
4830                                         if (atomic_read(&rdev->irq.pflip[1]))
4831                                                 radeon_crtc_handle_flip(rdev, 1);
4832                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4833                                         DRM_DEBUG("IH: D2 vblank\n");
4834                                 }
4835                                 break;
4836                         case 1: /* D2 vline */
4837                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4838                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4839                                         DRM_DEBUG("IH: D2 vline\n");
4840                                 }
4841                                 break;
4842                         default:
4843                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4844                                 break;
4845                         }
4846                         break;
4847                 case 3: /* D3 vblank/vline */
4848                         switch (src_data) {
4849                         case 0: /* D3 vblank */
4850                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4851                                         if (rdev->irq.crtc_vblank_int[2]) {
4852                                                 drm_handle_vblank(rdev->ddev, 2);
4853                                                 rdev->pm.vblank_sync = true;
4854                                                 wake_up(&rdev->irq.vblank_queue);
4855                                         }
4856                                         if (atomic_read(&rdev->irq.pflip[2]))
4857                                                 radeon_crtc_handle_flip(rdev, 2);
4858                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4859                                         DRM_DEBUG("IH: D3 vblank\n");
4860                                 }
4861                                 break;
4862                         case 1: /* D3 vline */
4863                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4864                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4865                                         DRM_DEBUG("IH: D3 vline\n");
4866                                 }
4867                                 break;
4868                         default:
4869                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4870                                 break;
4871                         }
4872                         break;
4873                 case 4: /* D4 vblank/vline */
4874                         switch (src_data) {
4875                         case 0: /* D4 vblank */
4876                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4877                                         if (rdev->irq.crtc_vblank_int[3]) {
4878                                                 drm_handle_vblank(rdev->ddev, 3);
4879                                                 rdev->pm.vblank_sync = true;
4880                                                 wake_up(&rdev->irq.vblank_queue);
4881                                         }
4882                                         if (atomic_read(&rdev->irq.pflip[3]))
4883                                                 radeon_crtc_handle_flip(rdev, 3);
4884                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4885                                         DRM_DEBUG("IH: D4 vblank\n");
4886                                 }
4887                                 break;
4888                         case 1: /* D4 vline */
4889                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4890                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4891                                         DRM_DEBUG("IH: D4 vline\n");
4892                                 }
4893                                 break;
4894                         default:
4895                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4896                                 break;
4897                         }
4898                         break;
4899                 case 5: /* D5 vblank/vline */
4900                         switch (src_data) {
4901                         case 0: /* D5 vblank */
4902                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4903                                         if (rdev->irq.crtc_vblank_int[4]) {
4904                                                 drm_handle_vblank(rdev->ddev, 4);
4905                                                 rdev->pm.vblank_sync = true;
4906                                                 wake_up(&rdev->irq.vblank_queue);
4907                                         }
4908                                         if (atomic_read(&rdev->irq.pflip[4]))
4909                                                 radeon_crtc_handle_flip(rdev, 4);
4910                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4911                                         DRM_DEBUG("IH: D5 vblank\n");
4912                                 }
4913                                 break;
4914                         case 1: /* D5 vline */
4915                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4916                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4917                                         DRM_DEBUG("IH: D5 vline\n");
4918                                 }
4919                                 break;
4920                         default:
4921                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4922                                 break;
4923                         }
4924                         break;
4925                 case 6: /* D6 vblank/vline */
4926                         switch (src_data) {
4927                         case 0: /* D6 vblank */
4928                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4929                                         if (rdev->irq.crtc_vblank_int[5]) {
4930                                                 drm_handle_vblank(rdev->ddev, 5);
4931                                                 rdev->pm.vblank_sync = true;
4932                                                 wake_up(&rdev->irq.vblank_queue);
4933                                         }
4934                                         if (atomic_read(&rdev->irq.pflip[5]))
4935                                                 radeon_crtc_handle_flip(rdev, 5);
4936                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4937                                         DRM_DEBUG("IH: D6 vblank\n");
4938                                 }
4939                                 break;
4940                         case 1: /* D6 vline */
4941                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4942                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4943                                         DRM_DEBUG("IH: D6 vline\n");
4944                                 }
4945                                 break;
4946                         default:
4947                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4948                                 break;
4949                         }
4950                         break;
4951                 case 42: /* HPD hotplug */
4952                         switch (src_data) {
4953                         case 0:
4954                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4955                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
4956                                         queue_hotplug = true;
4957                                         DRM_DEBUG("IH: HPD1\n");
4958                                 }
4959                                 break;
4960                         case 1:
4961                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4962                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4963                                         queue_hotplug = true;
4964                                         DRM_DEBUG("IH: HPD2\n");
4965                                 }
4966                                 break;
4967                         case 2:
4968                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4969                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4970                                         queue_hotplug = true;
4971                                         DRM_DEBUG("IH: HPD3\n");
4972                                 }
4973                                 break;
4974                         case 3:
4975                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4976                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4977                                         queue_hotplug = true;
4978                                         DRM_DEBUG("IH: HPD4\n");
4979                                 }
4980                                 break;
4981                         case 4:
4982                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4983                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4984                                         queue_hotplug = true;
4985                                         DRM_DEBUG("IH: HPD5\n");
4986                                 }
4987                                 break;
4988                         case 5:
4989                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4990                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4991                                         queue_hotplug = true;
4992                                         DRM_DEBUG("IH: HPD6\n");
4993                                 }
4994                                 break;
4995                         default:
4996                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4997                                 break;
4998                         }
4999                         break;
5000                 case 146:
5001                 case 147:
5002                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5003                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5004                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5005                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5006                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5007                         /* reset addr and status */
5008                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5009                         break;
5010                 case 176: /* RINGID0 CP_INT */
5011                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5012                         break;
5013                 case 177: /* RINGID1 CP_INT */
5014                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5015                         break;
5016                 case 178: /* RINGID2 CP_INT */
5017                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5018                         break;
5019                 case 181: /* CP EOP event */
5020                         DRM_DEBUG("IH: CP EOP\n");
5021                         switch (ring_id) {
5022                         case 0:
5023                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5024                                 break;
5025                         case 1:
5026                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5027                                 break;
5028                         case 2:
5029                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5030                                 break;
5031                         }
5032                         break;
5033                 case 224: /* DMA trap event */
5034                         DRM_DEBUG("IH: DMA trap\n");
5035                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5036                         break;
5037                 case 233: /* GUI IDLE */
5038                         DRM_DEBUG("IH: GUI idle\n");
5039                         break;
5040                 case 244: /* DMA trap event */
5041                         DRM_DEBUG("IH: DMA1 trap\n");
5042                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5043                         break;
5044                 default:
5045                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5046                         break;
5047                 }
5048
5049                 /* wptr/rptr are in bytes! */
5050                 rptr += 16;
5051                 rptr &= rdev->ih.ptr_mask;
5052         }
5053         if (queue_hotplug)
5054                 schedule_work(&rdev->hotplug_work);
5055         rdev->ih.rptr = rptr;
5056         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5057         atomic_set(&rdev->ih.lock, 0);
5058
5059         /* make sure wptr hasn't changed while processing */
5060         wptr = si_get_ih_wptr(rdev);
5061         if (wptr != rptr)
5062                 goto restart_ih;
5063
5064         return IRQ_HANDLED;
5065 }
5066
5067 /**
5068  * si_copy_dma - copy pages using the DMA engine
5069  *
5070  * @rdev: radeon_device pointer
5071  * @src_offset: src GPU address
5072  * @dst_offset: dst GPU address
5073  * @num_gpu_pages: number of GPU pages to xfer
5074  * @fence: radeon fence object
5075  *
5076  * Copy GPU paging using the DMA engine (SI).
5077  * Used by the radeon ttm implementation to move pages if
5078  * registered as the asic copy callback.
5079  */
5080 int si_copy_dma(struct radeon_device *rdev,
5081                 uint64_t src_offset, uint64_t dst_offset,
5082                 unsigned num_gpu_pages,
5083                 struct radeon_fence **fence)
5084 {
5085         struct radeon_semaphore *sem = NULL;
5086         int ring_index = rdev->asic->copy.dma_ring_index;
5087         struct radeon_ring *ring = &rdev->ring[ring_index];
5088         u32 size_in_bytes, cur_size_in_bytes;
5089         int i, num_loops;
5090         int r = 0;
5091
5092         r = radeon_semaphore_create(rdev, &sem);
5093         if (r) {
5094                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5095                 return r;
5096         }
5097
5098         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5099         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5100         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5101         if (r) {
5102                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5103                 radeon_semaphore_free(rdev, &sem, NULL);
5104                 return r;
5105         }
5106
5107         if (radeon_fence_need_sync(*fence, ring->idx)) {
5108                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5109                                             ring->idx);
5110                 radeon_fence_note_sync(*fence, ring->idx);
5111         } else {
5112                 radeon_semaphore_free(rdev, &sem, NULL);
5113         }
5114
5115         for (i = 0; i < num_loops; i++) {
5116                 cur_size_in_bytes = size_in_bytes;
5117                 if (cur_size_in_bytes > 0xFFFFF)
5118                         cur_size_in_bytes = 0xFFFFF;
5119                 size_in_bytes -= cur_size_in_bytes;
5120                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5121                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5122                 radeon_ring_write(ring, src_offset & 0xffffffff);
5123                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5124                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5125                 src_offset += cur_size_in_bytes;
5126                 dst_offset += cur_size_in_bytes;
5127         }
5128
5129         r = radeon_fence_emit(rdev, fence, ring->idx);
5130         if (r) {
5131                 radeon_ring_unlock_undo(rdev, ring);
5132                 return r;
5133         }
5134
5135         radeon_ring_unlock_commit(rdev, ring);
5136         radeon_semaphore_free(rdev, &sem, *fence);
5137
5138         return r;
5139 }
5140
5141 /*
5142  * startup/shutdown callbacks
5143  */
5144 static int si_startup(struct radeon_device *rdev)
5145 {
5146         struct radeon_ring *ring;
5147         int r;
5148
5149         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5150             !rdev->rlc_fw || !rdev->mc_fw) {
5151                 r = si_init_microcode(rdev);
5152                 if (r) {
5153                         DRM_ERROR("Failed to load firmware!\n");
5154                         return r;
5155                 }
5156         }
5157
5158         r = si_mc_load_microcode(rdev);
5159         if (r) {
5160                 DRM_ERROR("Failed to load MC firmware!\n");
5161                 return r;
5162         }
5163
5164         r = r600_vram_scratch_init(rdev);
5165         if (r)
5166                 return r;
5167
5168         si_mc_program(rdev);
5169         r = si_pcie_gart_enable(rdev);
5170         if (r)
5171                 return r;
5172         si_gpu_init(rdev);
5173
5174         /* allocate rlc buffers */
5175         r = si_rlc_init(rdev);
5176         if (r) {
5177                 DRM_ERROR("Failed to init rlc BOs!\n");
5178                 return r;
5179         }
5180
5181         /* allocate wb buffer */
5182         r = radeon_wb_init(rdev);
5183         if (r)
5184                 return r;
5185
5186         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5187         if (r) {
5188                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5189                 return r;
5190         }
5191
5192         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5193         if (r) {
5194                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5195                 return r;
5196         }
5197
5198         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5199         if (r) {
5200                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5201                 return r;
5202         }
5203
5204         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5205         if (r) {
5206                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5207                 return r;
5208         }
5209
5210         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5211         if (r) {
5212                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5213                 return r;
5214         }
5215
5216         r = rv770_uvd_resume(rdev);
5217         if (!r) {
5218                 r = radeon_fence_driver_start_ring(rdev,
5219                                                    R600_RING_TYPE_UVD_INDEX);
5220                 if (r)
5221                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5222         }
5223         if (r)
5224                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5225
5226         /* Enable IRQ */
5227         r = si_irq_init(rdev);
5228         if (r) {
5229                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5230                 radeon_irq_kms_fini(rdev);
5231                 return r;
5232         }
5233         si_irq_set(rdev);
5234
5235         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5236         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5237                              CP_RB0_RPTR, CP_RB0_WPTR,
5238                              0, 0xfffff, RADEON_CP_PACKET2);
5239         if (r)
5240                 return r;
5241
5242         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5243         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5244                              CP_RB1_RPTR, CP_RB1_WPTR,
5245                              0, 0xfffff, RADEON_CP_PACKET2);
5246         if (r)
5247                 return r;
5248
5249         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5250         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5251                              CP_RB2_RPTR, CP_RB2_WPTR,
5252                              0, 0xfffff, RADEON_CP_PACKET2);
5253         if (r)
5254                 return r;
5255
5256         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5257         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5258                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5259                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5260                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5261         if (r)
5262                 return r;
5263
5264         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5265         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5266                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5267                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5268                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5269         if (r)
5270                 return r;
5271
5272         r = si_cp_load_microcode(rdev);
5273         if (r)
5274                 return r;
5275         r = si_cp_resume(rdev);
5276         if (r)
5277                 return r;
5278
5279         r = cayman_dma_resume(rdev);
5280         if (r)
5281                 return r;
5282
5283         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5284         if (ring->ring_size) {
5285                 r = radeon_ring_init(rdev, ring, ring->ring_size,
5286                                      R600_WB_UVD_RPTR_OFFSET,
5287                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5288                                      0, 0xfffff, RADEON_CP_PACKET2);
5289                 if (!r)
5290                         r = r600_uvd_init(rdev);
5291                 if (r)
5292                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5293         }
5294
5295         r = radeon_ib_pool_init(rdev);
5296         if (r) {
5297                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5298                 return r;
5299         }
5300
5301         r = radeon_vm_manager_init(rdev);
5302         if (r) {
5303                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5304                 return r;
5305         }
5306
5307         return 0;
5308 }
5309
5310 int si_resume(struct radeon_device *rdev)
5311 {
5312         int r;
5313
5314         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5315          * posting will perform necessary task to bring back GPU into good
5316          * shape.
5317          */
5318         /* post card */
5319         atom_asic_init(rdev->mode_info.atom_context);
5320
5321         /* init golden registers */
5322         si_init_golden_registers(rdev);
5323
5324         rdev->accel_working = true;
5325         r = si_startup(rdev);
5326         if (r) {
5327                 DRM_ERROR("si startup failed on resume\n");
5328                 rdev->accel_working = false;
5329                 return r;
5330         }
5331
5332         return r;
5333
5334 }
5335
5336 int si_suspend(struct radeon_device *rdev)
5337 {
5338         radeon_vm_manager_fini(rdev);
5339         si_cp_enable(rdev, false);
5340         cayman_dma_stop(rdev);
5341         r600_uvd_rbc_stop(rdev);
5342         radeon_uvd_suspend(rdev);
5343         si_irq_suspend(rdev);
5344         radeon_wb_disable(rdev);
5345         si_pcie_gart_disable(rdev);
5346         return 0;
5347 }
5348
5349 /* Plan is to move initialization in that function and use
5350  * helper function so that radeon_device_init pretty much
5351  * do nothing more than calling asic specific function. This
5352  * should also allow to remove a bunch of callback function
5353  * like vram_info.
5354  */
5355 int si_init(struct radeon_device *rdev)
5356 {
5357         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5358         int r;
5359
5360         /* Read BIOS */
5361         if (!radeon_get_bios(rdev)) {
5362                 if (ASIC_IS_AVIVO(rdev))
5363                         return -EINVAL;
5364         }
5365         /* Must be an ATOMBIOS */
5366         if (!rdev->is_atom_bios) {
5367                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5368                 return -EINVAL;
5369         }
5370         r = radeon_atombios_init(rdev);
5371         if (r)
5372                 return r;
5373
5374         /* Post card if necessary */
5375         if (!radeon_card_posted(rdev)) {
5376                 if (!rdev->bios) {
5377                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5378                         return -EINVAL;
5379                 }
5380                 DRM_INFO("GPU not posted. posting now...\n");
5381                 atom_asic_init(rdev->mode_info.atom_context);
5382         }
5383         /* init golden registers */
5384         si_init_golden_registers(rdev);
5385         /* Initialize scratch registers */
5386         si_scratch_init(rdev);
5387         /* Initialize surface registers */
5388         radeon_surface_init(rdev);
5389         /* Initialize clocks */
5390         radeon_get_clock_info(rdev->ddev);
5391
5392         /* Fence driver */
5393         r = radeon_fence_driver_init(rdev);
5394         if (r)
5395                 return r;
5396
5397         /* initialize memory controller */
5398         r = si_mc_init(rdev);
5399         if (r)
5400                 return r;
5401         /* Memory manager */
5402         r = radeon_bo_init(rdev);
5403         if (r)
5404                 return r;
5405
5406         r = radeon_irq_kms_init(rdev);
5407         if (r)
5408                 return r;
5409
5410         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5411         ring->ring_obj = NULL;
5412         r600_ring_init(rdev, ring, 1024 * 1024);
5413
5414         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5415         ring->ring_obj = NULL;
5416         r600_ring_init(rdev, ring, 1024 * 1024);
5417
5418         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5419         ring->ring_obj = NULL;
5420         r600_ring_init(rdev, ring, 1024 * 1024);
5421
5422         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5423         ring->ring_obj = NULL;
5424         r600_ring_init(rdev, ring, 64 * 1024);
5425
5426         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5427         ring->ring_obj = NULL;
5428         r600_ring_init(rdev, ring, 64 * 1024);
5429
5430         r = radeon_uvd_init(rdev);
5431         if (!r) {
5432                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5433                 ring->ring_obj = NULL;
5434                 r600_ring_init(rdev, ring, 4096);
5435         }
5436
5437         rdev->ih.ring_obj = NULL;
5438         r600_ih_ring_init(rdev, 64 * 1024);
5439
5440         r = r600_pcie_gart_init(rdev);
5441         if (r)
5442                 return r;
5443
5444         rdev->accel_working = true;
5445         r = si_startup(rdev);
5446         if (r) {
5447                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5448                 si_cp_fini(rdev);
5449                 cayman_dma_fini(rdev);
5450                 si_irq_fini(rdev);
5451                 si_rlc_fini(rdev);
5452                 radeon_wb_fini(rdev);
5453                 radeon_ib_pool_fini(rdev);
5454                 radeon_vm_manager_fini(rdev);
5455                 radeon_irq_kms_fini(rdev);
5456                 si_pcie_gart_fini(rdev);
5457                 rdev->accel_working = false;
5458         }
5459
5460         /* Don't start up if the MC ucode is missing.
5461          * The default clocks and voltages before the MC ucode
5462          * is loaded are not suffient for advanced operations.
5463          */
5464         if (!rdev->mc_fw) {
5465                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5466                 return -EINVAL;
5467         }
5468
5469         return 0;
5470 }
5471
5472 void si_fini(struct radeon_device *rdev)
5473 {
5474         si_cp_fini(rdev);
5475         cayman_dma_fini(rdev);
5476         si_irq_fini(rdev);
5477         si_rlc_fini(rdev);
5478         radeon_wb_fini(rdev);
5479         radeon_vm_manager_fini(rdev);
5480         radeon_ib_pool_fini(rdev);
5481         radeon_irq_kms_fini(rdev);
5482         radeon_uvd_fini(rdev);
5483         si_pcie_gart_fini(rdev);
5484         r600_vram_scratch_fini(rdev);
5485         radeon_gem_fini(rdev);
5486         radeon_fence_driver_fini(rdev);
5487         radeon_bo_fini(rdev);
5488         radeon_atombios_fini(rdev);
5489         kfree(rdev->bios);
5490         rdev->bios = NULL;
5491 }
5492
5493 /**
5494  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5495  *
5496  * @rdev: radeon_device pointer
5497  *
5498  * Fetches a GPU clock counter snapshot (SI).
5499  * Returns the 64 bit clock counter snapshot.
5500  */
5501 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5502 {
5503         uint64_t clock;
5504
5505         mutex_lock(&rdev->gpu_clock_mutex);
5506         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5507         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5508                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5509         mutex_unlock(&rdev->gpu_clock_mutex);
5510         return clock;
5511 }
5512
5513 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5514 {
5515         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5516         int r;
5517
5518         /* bypass vclk and dclk with bclk */
5519         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5520                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5521                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5522
5523         /* put PLL in bypass mode */
5524         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5525
5526         if (!vclk || !dclk) {
5527                 /* keep the Bypass mode, put PLL to sleep */
5528                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5529                 return 0;
5530         }
5531
5532         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5533                                           16384, 0x03FFFFFF, 0, 128, 5,
5534                                           &fb_div, &vclk_div, &dclk_div);
5535         if (r)
5536                 return r;
5537
5538         /* set RESET_ANTI_MUX to 0 */
5539         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5540
5541         /* set VCO_MODE to 1 */
5542         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5543
5544         /* toggle UPLL_SLEEP to 1 then back to 0 */
5545         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5546         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5547
5548         /* deassert UPLL_RESET */
5549         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5550
5551         mdelay(1);
5552
5553         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5554         if (r)
5555                 return r;
5556
5557         /* assert UPLL_RESET again */
5558         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5559
5560         /* disable spread spectrum. */
5561         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5562
5563         /* set feedback divider */
5564         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5565
5566         /* set ref divider to 0 */
5567         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5568
5569         if (fb_div < 307200)
5570                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5571         else
5572                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5573
5574         /* set PDIV_A and PDIV_B */
5575         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5576                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5577                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5578
5579         /* give the PLL some time to settle */
5580         mdelay(15);
5581
5582         /* deassert PLL_RESET */
5583         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5584
5585         mdelay(15);
5586
5587         /* switch from bypass mode to normal mode */
5588         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5589
5590         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5591         if (r)
5592                 return r;
5593
5594         /* switch VCLK and DCLK selection */
5595         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5596                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5597                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5598
5599         mdelay(100);
5600
5601         return 0;
5602 }