drm/radeon: fill in GPU init for Hainan (v2)
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63
64 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
65 extern void r600_ih_ring_fini(struct radeon_device *rdev);
66 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
70 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72
73 static const u32 tahiti_golden_rlc_registers[] =
74 {
75         0xc424, 0xffffffff, 0x00601005,
76         0xc47c, 0xffffffff, 0x10104040,
77         0xc488, 0xffffffff, 0x0100000a,
78         0xc314, 0xffffffff, 0x00000800,
79         0xc30c, 0xffffffff, 0x800000f4,
80         0xf4a8, 0xffffffff, 0x00000000
81 };
82
83 static const u32 tahiti_golden_registers[] =
84 {
85         0x9a10, 0x00010000, 0x00018208,
86         0x9830, 0xffffffff, 0x00000000,
87         0x9834, 0xf00fffff, 0x00000400,
88         0x9838, 0x0002021c, 0x00020200,
89         0xc78, 0x00000080, 0x00000000,
90         0xd030, 0x000300c0, 0x00800040,
91         0xd830, 0x000300c0, 0x00800040,
92         0x5bb0, 0x000000f0, 0x00000070,
93         0x5bc0, 0x00200000, 0x50100000,
94         0x7030, 0x31000311, 0x00000011,
95         0x277c, 0x00000003, 0x000007ff,
96         0x240c, 0x000007ff, 0x00000000,
97         0x8a14, 0xf000001f, 0x00000007,
98         0x8b24, 0xffffffff, 0x00ffffff,
99         0x8b10, 0x0000ff0f, 0x00000000,
100         0x28a4c, 0x07ffffff, 0x4e000000,
101         0x28350, 0x3f3f3fff, 0x2a00126a,
102         0x30, 0x000000ff, 0x0040,
103         0x34, 0x00000040, 0x00004040,
104         0x9100, 0x07ffffff, 0x03000000,
105         0x8e88, 0x01ff1f3f, 0x00000000,
106         0x8e84, 0x01ff1f3f, 0x00000000,
107         0x9060, 0x0000007f, 0x00000020,
108         0x9508, 0x00010000, 0x00010000,
109         0xac14, 0x00000200, 0x000002fb,
110         0xac10, 0xffffffff, 0x0000543b,
111         0xac0c, 0xffffffff, 0xa9210876,
112         0x88d0, 0xffffffff, 0x000fff40,
113         0x88d4, 0x0000001f, 0x00000010,
114         0x1410, 0x20000000, 0x20fffed8,
115         0x15c0, 0x000c0fc0, 0x000c0400
116 };
117
118 static const u32 tahiti_golden_registers2[] =
119 {
120         0xc64, 0x00000001, 0x00000001
121 };
122
123 static const u32 pitcairn_golden_rlc_registers[] =
124 {
125         0xc424, 0xffffffff, 0x00601004,
126         0xc47c, 0xffffffff, 0x10102020,
127         0xc488, 0xffffffff, 0x01000020,
128         0xc314, 0xffffffff, 0x00000800,
129         0xc30c, 0xffffffff, 0x800000a4
130 };
131
132 static const u32 pitcairn_golden_registers[] =
133 {
134         0x9a10, 0x00010000, 0x00018208,
135         0x9830, 0xffffffff, 0x00000000,
136         0x9834, 0xf00fffff, 0x00000400,
137         0x9838, 0x0002021c, 0x00020200,
138         0xc78, 0x00000080, 0x00000000,
139         0xd030, 0x000300c0, 0x00800040,
140         0xd830, 0x000300c0, 0x00800040,
141         0x5bb0, 0x000000f0, 0x00000070,
142         0x5bc0, 0x00200000, 0x50100000,
143         0x7030, 0x31000311, 0x00000011,
144         0x2ae4, 0x00073ffe, 0x000022a2,
145         0x240c, 0x000007ff, 0x00000000,
146         0x8a14, 0xf000001f, 0x00000007,
147         0x8b24, 0xffffffff, 0x00ffffff,
148         0x8b10, 0x0000ff0f, 0x00000000,
149         0x28a4c, 0x07ffffff, 0x4e000000,
150         0x28350, 0x3f3f3fff, 0x2a00126a,
151         0x30, 0x000000ff, 0x0040,
152         0x34, 0x00000040, 0x00004040,
153         0x9100, 0x07ffffff, 0x03000000,
154         0x9060, 0x0000007f, 0x00000020,
155         0x9508, 0x00010000, 0x00010000,
156         0xac14, 0x000003ff, 0x000000f7,
157         0xac10, 0xffffffff, 0x00000000,
158         0xac0c, 0xffffffff, 0x32761054,
159         0x88d4, 0x0000001f, 0x00000010,
160         0x15c0, 0x000c0fc0, 0x000c0400
161 };
162
163 static const u32 verde_golden_rlc_registers[] =
164 {
165         0xc424, 0xffffffff, 0x033f1005,
166         0xc47c, 0xffffffff, 0x10808020,
167         0xc488, 0xffffffff, 0x00800008,
168         0xc314, 0xffffffff, 0x00001000,
169         0xc30c, 0xffffffff, 0x80010014
170 };
171
172 static const u32 verde_golden_registers[] =
173 {
174         0x9a10, 0x00010000, 0x00018208,
175         0x9830, 0xffffffff, 0x00000000,
176         0x9834, 0xf00fffff, 0x00000400,
177         0x9838, 0x0002021c, 0x00020200,
178         0xc78, 0x00000080, 0x00000000,
179         0xd030, 0x000300c0, 0x00800040,
180         0xd030, 0x000300c0, 0x00800040,
181         0xd830, 0x000300c0, 0x00800040,
182         0xd830, 0x000300c0, 0x00800040,
183         0x5bb0, 0x000000f0, 0x00000070,
184         0x5bc0, 0x00200000, 0x50100000,
185         0x7030, 0x31000311, 0x00000011,
186         0x2ae4, 0x00073ffe, 0x000022a2,
187         0x2ae4, 0x00073ffe, 0x000022a2,
188         0x2ae4, 0x00073ffe, 0x000022a2,
189         0x240c, 0x000007ff, 0x00000000,
190         0x240c, 0x000007ff, 0x00000000,
191         0x240c, 0x000007ff, 0x00000000,
192         0x8a14, 0xf000001f, 0x00000007,
193         0x8a14, 0xf000001f, 0x00000007,
194         0x8a14, 0xf000001f, 0x00000007,
195         0x8b24, 0xffffffff, 0x00ffffff,
196         0x8b10, 0x0000ff0f, 0x00000000,
197         0x28a4c, 0x07ffffff, 0x4e000000,
198         0x28350, 0x3f3f3fff, 0x0000124a,
199         0x28350, 0x3f3f3fff, 0x0000124a,
200         0x28350, 0x3f3f3fff, 0x0000124a,
201         0x30, 0x000000ff, 0x0040,
202         0x34, 0x00000040, 0x00004040,
203         0x9100, 0x07ffffff, 0x03000000,
204         0x9100, 0x07ffffff, 0x03000000,
205         0x8e88, 0x01ff1f3f, 0x00000000,
206         0x8e88, 0x01ff1f3f, 0x00000000,
207         0x8e88, 0x01ff1f3f, 0x00000000,
208         0x8e84, 0x01ff1f3f, 0x00000000,
209         0x8e84, 0x01ff1f3f, 0x00000000,
210         0x8e84, 0x01ff1f3f, 0x00000000,
211         0x9060, 0x0000007f, 0x00000020,
212         0x9508, 0x00010000, 0x00010000,
213         0xac14, 0x000003ff, 0x00000003,
214         0xac14, 0x000003ff, 0x00000003,
215         0xac14, 0x000003ff, 0x00000003,
216         0xac10, 0xffffffff, 0x00000000,
217         0xac10, 0xffffffff, 0x00000000,
218         0xac10, 0xffffffff, 0x00000000,
219         0xac0c, 0xffffffff, 0x00001032,
220         0xac0c, 0xffffffff, 0x00001032,
221         0xac0c, 0xffffffff, 0x00001032,
222         0x88d4, 0x0000001f, 0x00000010,
223         0x88d4, 0x0000001f, 0x00000010,
224         0x88d4, 0x0000001f, 0x00000010,
225         0x15c0, 0x000c0fc0, 0x000c0400
226 };
227
228 static const u32 oland_golden_rlc_registers[] =
229 {
230         0xc424, 0xffffffff, 0x00601005,
231         0xc47c, 0xffffffff, 0x10104040,
232         0xc488, 0xffffffff, 0x0100000a,
233         0xc314, 0xffffffff, 0x00000800,
234         0xc30c, 0xffffffff, 0x800000f4
235 };
236
237 static const u32 oland_golden_registers[] =
238 {
239         0x9a10, 0x00010000, 0x00018208,
240         0x9830, 0xffffffff, 0x00000000,
241         0x9834, 0xf00fffff, 0x00000400,
242         0x9838, 0x0002021c, 0x00020200,
243         0xc78, 0x00000080, 0x00000000,
244         0xd030, 0x000300c0, 0x00800040,
245         0xd830, 0x000300c0, 0x00800040,
246         0x5bb0, 0x000000f0, 0x00000070,
247         0x5bc0, 0x00200000, 0x50100000,
248         0x7030, 0x31000311, 0x00000011,
249         0x2ae4, 0x00073ffe, 0x000022a2,
250         0x240c, 0x000007ff, 0x00000000,
251         0x8a14, 0xf000001f, 0x00000007,
252         0x8b24, 0xffffffff, 0x00ffffff,
253         0x8b10, 0x0000ff0f, 0x00000000,
254         0x28a4c, 0x07ffffff, 0x4e000000,
255         0x28350, 0x3f3f3fff, 0x00000082,
256         0x30, 0x000000ff, 0x0040,
257         0x34, 0x00000040, 0x00004040,
258         0x9100, 0x07ffffff, 0x03000000,
259         0x9060, 0x0000007f, 0x00000020,
260         0x9508, 0x00010000, 0x00010000,
261         0xac14, 0x000003ff, 0x000000f3,
262         0xac10, 0xffffffff, 0x00000000,
263         0xac0c, 0xffffffff, 0x00003210,
264         0x88d4, 0x0000001f, 0x00000010,
265         0x15c0, 0x000c0fc0, 0x000c0400
266 };
267
268 static const u32 tahiti_mgcg_cgcg_init[] =
269 {
270         0xc400, 0xffffffff, 0xfffffffc,
271         0x802c, 0xffffffff, 0xe0000000,
272         0x9a60, 0xffffffff, 0x00000100,
273         0x92a4, 0xffffffff, 0x00000100,
274         0xc164, 0xffffffff, 0x00000100,
275         0x9774, 0xffffffff, 0x00000100,
276         0x8984, 0xffffffff, 0x06000100,
277         0x8a18, 0xffffffff, 0x00000100,
278         0x92a0, 0xffffffff, 0x00000100,
279         0xc380, 0xffffffff, 0x00000100,
280         0x8b28, 0xffffffff, 0x00000100,
281         0x9144, 0xffffffff, 0x00000100,
282         0x8d88, 0xffffffff, 0x00000100,
283         0x8d8c, 0xffffffff, 0x00000100,
284         0x9030, 0xffffffff, 0x00000100,
285         0x9034, 0xffffffff, 0x00000100,
286         0x9038, 0xffffffff, 0x00000100,
287         0x903c, 0xffffffff, 0x00000100,
288         0xad80, 0xffffffff, 0x00000100,
289         0xac54, 0xffffffff, 0x00000100,
290         0x897c, 0xffffffff, 0x06000100,
291         0x9868, 0xffffffff, 0x00000100,
292         0x9510, 0xffffffff, 0x00000100,
293         0xaf04, 0xffffffff, 0x00000100,
294         0xae04, 0xffffffff, 0x00000100,
295         0x949c, 0xffffffff, 0x00000100,
296         0x802c, 0xffffffff, 0xe0000000,
297         0x9160, 0xffffffff, 0x00010000,
298         0x9164, 0xffffffff, 0x00030002,
299         0x9168, 0xffffffff, 0x00040007,
300         0x916c, 0xffffffff, 0x00060005,
301         0x9170, 0xffffffff, 0x00090008,
302         0x9174, 0xffffffff, 0x00020001,
303         0x9178, 0xffffffff, 0x00040003,
304         0x917c, 0xffffffff, 0x00000007,
305         0x9180, 0xffffffff, 0x00060005,
306         0x9184, 0xffffffff, 0x00090008,
307         0x9188, 0xffffffff, 0x00030002,
308         0x918c, 0xffffffff, 0x00050004,
309         0x9190, 0xffffffff, 0x00000008,
310         0x9194, 0xffffffff, 0x00070006,
311         0x9198, 0xffffffff, 0x000a0009,
312         0x919c, 0xffffffff, 0x00040003,
313         0x91a0, 0xffffffff, 0x00060005,
314         0x91a4, 0xffffffff, 0x00000009,
315         0x91a8, 0xffffffff, 0x00080007,
316         0x91ac, 0xffffffff, 0x000b000a,
317         0x91b0, 0xffffffff, 0x00050004,
318         0x91b4, 0xffffffff, 0x00070006,
319         0x91b8, 0xffffffff, 0x0008000b,
320         0x91bc, 0xffffffff, 0x000a0009,
321         0x91c0, 0xffffffff, 0x000d000c,
322         0x91c4, 0xffffffff, 0x00060005,
323         0x91c8, 0xffffffff, 0x00080007,
324         0x91cc, 0xffffffff, 0x0000000b,
325         0x91d0, 0xffffffff, 0x000a0009,
326         0x91d4, 0xffffffff, 0x000d000c,
327         0x91d8, 0xffffffff, 0x00070006,
328         0x91dc, 0xffffffff, 0x00090008,
329         0x91e0, 0xffffffff, 0x0000000c,
330         0x91e4, 0xffffffff, 0x000b000a,
331         0x91e8, 0xffffffff, 0x000e000d,
332         0x91ec, 0xffffffff, 0x00080007,
333         0x91f0, 0xffffffff, 0x000a0009,
334         0x91f4, 0xffffffff, 0x0000000d,
335         0x91f8, 0xffffffff, 0x000c000b,
336         0x91fc, 0xffffffff, 0x000f000e,
337         0x9200, 0xffffffff, 0x00090008,
338         0x9204, 0xffffffff, 0x000b000a,
339         0x9208, 0xffffffff, 0x000c000f,
340         0x920c, 0xffffffff, 0x000e000d,
341         0x9210, 0xffffffff, 0x00110010,
342         0x9214, 0xffffffff, 0x000a0009,
343         0x9218, 0xffffffff, 0x000c000b,
344         0x921c, 0xffffffff, 0x0000000f,
345         0x9220, 0xffffffff, 0x000e000d,
346         0x9224, 0xffffffff, 0x00110010,
347         0x9228, 0xffffffff, 0x000b000a,
348         0x922c, 0xffffffff, 0x000d000c,
349         0x9230, 0xffffffff, 0x00000010,
350         0x9234, 0xffffffff, 0x000f000e,
351         0x9238, 0xffffffff, 0x00120011,
352         0x923c, 0xffffffff, 0x000c000b,
353         0x9240, 0xffffffff, 0x000e000d,
354         0x9244, 0xffffffff, 0x00000011,
355         0x9248, 0xffffffff, 0x0010000f,
356         0x924c, 0xffffffff, 0x00130012,
357         0x9250, 0xffffffff, 0x000d000c,
358         0x9254, 0xffffffff, 0x000f000e,
359         0x9258, 0xffffffff, 0x00100013,
360         0x925c, 0xffffffff, 0x00120011,
361         0x9260, 0xffffffff, 0x00150014,
362         0x9264, 0xffffffff, 0x000e000d,
363         0x9268, 0xffffffff, 0x0010000f,
364         0x926c, 0xffffffff, 0x00000013,
365         0x9270, 0xffffffff, 0x00120011,
366         0x9274, 0xffffffff, 0x00150014,
367         0x9278, 0xffffffff, 0x000f000e,
368         0x927c, 0xffffffff, 0x00110010,
369         0x9280, 0xffffffff, 0x00000014,
370         0x9284, 0xffffffff, 0x00130012,
371         0x9288, 0xffffffff, 0x00160015,
372         0x928c, 0xffffffff, 0x0010000f,
373         0x9290, 0xffffffff, 0x00120011,
374         0x9294, 0xffffffff, 0x00000015,
375         0x9298, 0xffffffff, 0x00140013,
376         0x929c, 0xffffffff, 0x00170016,
377         0x9150, 0xffffffff, 0x96940200,
378         0x8708, 0xffffffff, 0x00900100,
379         0xc478, 0xffffffff, 0x00000080,
380         0xc404, 0xffffffff, 0x0020003f,
381         0x30, 0xffffffff, 0x0000001c,
382         0x34, 0x000f0000, 0x000f0000,
383         0x160c, 0xffffffff, 0x00000100,
384         0x1024, 0xffffffff, 0x00000100,
385         0x102c, 0x00000101, 0x00000000,
386         0x20a8, 0xffffffff, 0x00000104,
387         0x264c, 0x000c0000, 0x000c0000,
388         0x2648, 0x000c0000, 0x000c0000,
389         0x55e4, 0xff000fff, 0x00000100,
390         0x55e8, 0x00000001, 0x00000001,
391         0x2f50, 0x00000001, 0x00000001,
392         0x30cc, 0xc0000fff, 0x00000104,
393         0xc1e4, 0x00000001, 0x00000001,
394         0xd0c0, 0xfffffff0, 0x00000100,
395         0xd8c0, 0xfffffff0, 0x00000100
396 };
397
398 static const u32 pitcairn_mgcg_cgcg_init[] =
399 {
400         0xc400, 0xffffffff, 0xfffffffc,
401         0x802c, 0xffffffff, 0xe0000000,
402         0x9a60, 0xffffffff, 0x00000100,
403         0x92a4, 0xffffffff, 0x00000100,
404         0xc164, 0xffffffff, 0x00000100,
405         0x9774, 0xffffffff, 0x00000100,
406         0x8984, 0xffffffff, 0x06000100,
407         0x8a18, 0xffffffff, 0x00000100,
408         0x92a0, 0xffffffff, 0x00000100,
409         0xc380, 0xffffffff, 0x00000100,
410         0x8b28, 0xffffffff, 0x00000100,
411         0x9144, 0xffffffff, 0x00000100,
412         0x8d88, 0xffffffff, 0x00000100,
413         0x8d8c, 0xffffffff, 0x00000100,
414         0x9030, 0xffffffff, 0x00000100,
415         0x9034, 0xffffffff, 0x00000100,
416         0x9038, 0xffffffff, 0x00000100,
417         0x903c, 0xffffffff, 0x00000100,
418         0xad80, 0xffffffff, 0x00000100,
419         0xac54, 0xffffffff, 0x00000100,
420         0x897c, 0xffffffff, 0x06000100,
421         0x9868, 0xffffffff, 0x00000100,
422         0x9510, 0xffffffff, 0x00000100,
423         0xaf04, 0xffffffff, 0x00000100,
424         0xae04, 0xffffffff, 0x00000100,
425         0x949c, 0xffffffff, 0x00000100,
426         0x802c, 0xffffffff, 0xe0000000,
427         0x9160, 0xffffffff, 0x00010000,
428         0x9164, 0xffffffff, 0x00030002,
429         0x9168, 0xffffffff, 0x00040007,
430         0x916c, 0xffffffff, 0x00060005,
431         0x9170, 0xffffffff, 0x00090008,
432         0x9174, 0xffffffff, 0x00020001,
433         0x9178, 0xffffffff, 0x00040003,
434         0x917c, 0xffffffff, 0x00000007,
435         0x9180, 0xffffffff, 0x00060005,
436         0x9184, 0xffffffff, 0x00090008,
437         0x9188, 0xffffffff, 0x00030002,
438         0x918c, 0xffffffff, 0x00050004,
439         0x9190, 0xffffffff, 0x00000008,
440         0x9194, 0xffffffff, 0x00070006,
441         0x9198, 0xffffffff, 0x000a0009,
442         0x919c, 0xffffffff, 0x00040003,
443         0x91a0, 0xffffffff, 0x00060005,
444         0x91a4, 0xffffffff, 0x00000009,
445         0x91a8, 0xffffffff, 0x00080007,
446         0x91ac, 0xffffffff, 0x000b000a,
447         0x91b0, 0xffffffff, 0x00050004,
448         0x91b4, 0xffffffff, 0x00070006,
449         0x91b8, 0xffffffff, 0x0008000b,
450         0x91bc, 0xffffffff, 0x000a0009,
451         0x91c0, 0xffffffff, 0x000d000c,
452         0x9200, 0xffffffff, 0x00090008,
453         0x9204, 0xffffffff, 0x000b000a,
454         0x9208, 0xffffffff, 0x000c000f,
455         0x920c, 0xffffffff, 0x000e000d,
456         0x9210, 0xffffffff, 0x00110010,
457         0x9214, 0xffffffff, 0x000a0009,
458         0x9218, 0xffffffff, 0x000c000b,
459         0x921c, 0xffffffff, 0x0000000f,
460         0x9220, 0xffffffff, 0x000e000d,
461         0x9224, 0xffffffff, 0x00110010,
462         0x9228, 0xffffffff, 0x000b000a,
463         0x922c, 0xffffffff, 0x000d000c,
464         0x9230, 0xffffffff, 0x00000010,
465         0x9234, 0xffffffff, 0x000f000e,
466         0x9238, 0xffffffff, 0x00120011,
467         0x923c, 0xffffffff, 0x000c000b,
468         0x9240, 0xffffffff, 0x000e000d,
469         0x9244, 0xffffffff, 0x00000011,
470         0x9248, 0xffffffff, 0x0010000f,
471         0x924c, 0xffffffff, 0x00130012,
472         0x9250, 0xffffffff, 0x000d000c,
473         0x9254, 0xffffffff, 0x000f000e,
474         0x9258, 0xffffffff, 0x00100013,
475         0x925c, 0xffffffff, 0x00120011,
476         0x9260, 0xffffffff, 0x00150014,
477         0x9150, 0xffffffff, 0x96940200,
478         0x8708, 0xffffffff, 0x00900100,
479         0xc478, 0xffffffff, 0x00000080,
480         0xc404, 0xffffffff, 0x0020003f,
481         0x30, 0xffffffff, 0x0000001c,
482         0x34, 0x000f0000, 0x000f0000,
483         0x160c, 0xffffffff, 0x00000100,
484         0x1024, 0xffffffff, 0x00000100,
485         0x102c, 0x00000101, 0x00000000,
486         0x20a8, 0xffffffff, 0x00000104,
487         0x55e4, 0xff000fff, 0x00000100,
488         0x55e8, 0x00000001, 0x00000001,
489         0x2f50, 0x00000001, 0x00000001,
490         0x30cc, 0xc0000fff, 0x00000104,
491         0xc1e4, 0x00000001, 0x00000001,
492         0xd0c0, 0xfffffff0, 0x00000100,
493         0xd8c0, 0xfffffff0, 0x00000100
494 };
495
496 static const u32 verde_mgcg_cgcg_init[] =
497 {
498         0xc400, 0xffffffff, 0xfffffffc,
499         0x802c, 0xffffffff, 0xe0000000,
500         0x9a60, 0xffffffff, 0x00000100,
501         0x92a4, 0xffffffff, 0x00000100,
502         0xc164, 0xffffffff, 0x00000100,
503         0x9774, 0xffffffff, 0x00000100,
504         0x8984, 0xffffffff, 0x06000100,
505         0x8a18, 0xffffffff, 0x00000100,
506         0x92a0, 0xffffffff, 0x00000100,
507         0xc380, 0xffffffff, 0x00000100,
508         0x8b28, 0xffffffff, 0x00000100,
509         0x9144, 0xffffffff, 0x00000100,
510         0x8d88, 0xffffffff, 0x00000100,
511         0x8d8c, 0xffffffff, 0x00000100,
512         0x9030, 0xffffffff, 0x00000100,
513         0x9034, 0xffffffff, 0x00000100,
514         0x9038, 0xffffffff, 0x00000100,
515         0x903c, 0xffffffff, 0x00000100,
516         0xad80, 0xffffffff, 0x00000100,
517         0xac54, 0xffffffff, 0x00000100,
518         0x897c, 0xffffffff, 0x06000100,
519         0x9868, 0xffffffff, 0x00000100,
520         0x9510, 0xffffffff, 0x00000100,
521         0xaf04, 0xffffffff, 0x00000100,
522         0xae04, 0xffffffff, 0x00000100,
523         0x949c, 0xffffffff, 0x00000100,
524         0x802c, 0xffffffff, 0xe0000000,
525         0x9160, 0xffffffff, 0x00010000,
526         0x9164, 0xffffffff, 0x00030002,
527         0x9168, 0xffffffff, 0x00040007,
528         0x916c, 0xffffffff, 0x00060005,
529         0x9170, 0xffffffff, 0x00090008,
530         0x9174, 0xffffffff, 0x00020001,
531         0x9178, 0xffffffff, 0x00040003,
532         0x917c, 0xffffffff, 0x00000007,
533         0x9180, 0xffffffff, 0x00060005,
534         0x9184, 0xffffffff, 0x00090008,
535         0x9188, 0xffffffff, 0x00030002,
536         0x918c, 0xffffffff, 0x00050004,
537         0x9190, 0xffffffff, 0x00000008,
538         0x9194, 0xffffffff, 0x00070006,
539         0x9198, 0xffffffff, 0x000a0009,
540         0x919c, 0xffffffff, 0x00040003,
541         0x91a0, 0xffffffff, 0x00060005,
542         0x91a4, 0xffffffff, 0x00000009,
543         0x91a8, 0xffffffff, 0x00080007,
544         0x91ac, 0xffffffff, 0x000b000a,
545         0x91b0, 0xffffffff, 0x00050004,
546         0x91b4, 0xffffffff, 0x00070006,
547         0x91b8, 0xffffffff, 0x0008000b,
548         0x91bc, 0xffffffff, 0x000a0009,
549         0x91c0, 0xffffffff, 0x000d000c,
550         0x9200, 0xffffffff, 0x00090008,
551         0x9204, 0xffffffff, 0x000b000a,
552         0x9208, 0xffffffff, 0x000c000f,
553         0x920c, 0xffffffff, 0x000e000d,
554         0x9210, 0xffffffff, 0x00110010,
555         0x9214, 0xffffffff, 0x000a0009,
556         0x9218, 0xffffffff, 0x000c000b,
557         0x921c, 0xffffffff, 0x0000000f,
558         0x9220, 0xffffffff, 0x000e000d,
559         0x9224, 0xffffffff, 0x00110010,
560         0x9228, 0xffffffff, 0x000b000a,
561         0x922c, 0xffffffff, 0x000d000c,
562         0x9230, 0xffffffff, 0x00000010,
563         0x9234, 0xffffffff, 0x000f000e,
564         0x9238, 0xffffffff, 0x00120011,
565         0x923c, 0xffffffff, 0x000c000b,
566         0x9240, 0xffffffff, 0x000e000d,
567         0x9244, 0xffffffff, 0x00000011,
568         0x9248, 0xffffffff, 0x0010000f,
569         0x924c, 0xffffffff, 0x00130012,
570         0x9250, 0xffffffff, 0x000d000c,
571         0x9254, 0xffffffff, 0x000f000e,
572         0x9258, 0xffffffff, 0x00100013,
573         0x925c, 0xffffffff, 0x00120011,
574         0x9260, 0xffffffff, 0x00150014,
575         0x9150, 0xffffffff, 0x96940200,
576         0x8708, 0xffffffff, 0x00900100,
577         0xc478, 0xffffffff, 0x00000080,
578         0xc404, 0xffffffff, 0x0020003f,
579         0x30, 0xffffffff, 0x0000001c,
580         0x34, 0x000f0000, 0x000f0000,
581         0x160c, 0xffffffff, 0x00000100,
582         0x1024, 0xffffffff, 0x00000100,
583         0x102c, 0x00000101, 0x00000000,
584         0x20a8, 0xffffffff, 0x00000104,
585         0x264c, 0x000c0000, 0x000c0000,
586         0x2648, 0x000c0000, 0x000c0000,
587         0x55e4, 0xff000fff, 0x00000100,
588         0x55e8, 0x00000001, 0x00000001,
589         0x2f50, 0x00000001, 0x00000001,
590         0x30cc, 0xc0000fff, 0x00000104,
591         0xc1e4, 0x00000001, 0x00000001,
592         0xd0c0, 0xfffffff0, 0x00000100,
593         0xd8c0, 0xfffffff0, 0x00000100
594 };
595
596 static const u32 oland_mgcg_cgcg_init[] =
597 {
598         0xc400, 0xffffffff, 0xfffffffc,
599         0x802c, 0xffffffff, 0xe0000000,
600         0x9a60, 0xffffffff, 0x00000100,
601         0x92a4, 0xffffffff, 0x00000100,
602         0xc164, 0xffffffff, 0x00000100,
603         0x9774, 0xffffffff, 0x00000100,
604         0x8984, 0xffffffff, 0x06000100,
605         0x8a18, 0xffffffff, 0x00000100,
606         0x92a0, 0xffffffff, 0x00000100,
607         0xc380, 0xffffffff, 0x00000100,
608         0x8b28, 0xffffffff, 0x00000100,
609         0x9144, 0xffffffff, 0x00000100,
610         0x8d88, 0xffffffff, 0x00000100,
611         0x8d8c, 0xffffffff, 0x00000100,
612         0x9030, 0xffffffff, 0x00000100,
613         0x9034, 0xffffffff, 0x00000100,
614         0x9038, 0xffffffff, 0x00000100,
615         0x903c, 0xffffffff, 0x00000100,
616         0xad80, 0xffffffff, 0x00000100,
617         0xac54, 0xffffffff, 0x00000100,
618         0x897c, 0xffffffff, 0x06000100,
619         0x9868, 0xffffffff, 0x00000100,
620         0x9510, 0xffffffff, 0x00000100,
621         0xaf04, 0xffffffff, 0x00000100,
622         0xae04, 0xffffffff, 0x00000100,
623         0x949c, 0xffffffff, 0x00000100,
624         0x802c, 0xffffffff, 0xe0000000,
625         0x9160, 0xffffffff, 0x00010000,
626         0x9164, 0xffffffff, 0x00030002,
627         0x9168, 0xffffffff, 0x00040007,
628         0x916c, 0xffffffff, 0x00060005,
629         0x9170, 0xffffffff, 0x00090008,
630         0x9174, 0xffffffff, 0x00020001,
631         0x9178, 0xffffffff, 0x00040003,
632         0x917c, 0xffffffff, 0x00000007,
633         0x9180, 0xffffffff, 0x00060005,
634         0x9184, 0xffffffff, 0x00090008,
635         0x9188, 0xffffffff, 0x00030002,
636         0x918c, 0xffffffff, 0x00050004,
637         0x9190, 0xffffffff, 0x00000008,
638         0x9194, 0xffffffff, 0x00070006,
639         0x9198, 0xffffffff, 0x000a0009,
640         0x919c, 0xffffffff, 0x00040003,
641         0x91a0, 0xffffffff, 0x00060005,
642         0x91a4, 0xffffffff, 0x00000009,
643         0x91a8, 0xffffffff, 0x00080007,
644         0x91ac, 0xffffffff, 0x000b000a,
645         0x91b0, 0xffffffff, 0x00050004,
646         0x91b4, 0xffffffff, 0x00070006,
647         0x91b8, 0xffffffff, 0x0008000b,
648         0x91bc, 0xffffffff, 0x000a0009,
649         0x91c0, 0xffffffff, 0x000d000c,
650         0x91c4, 0xffffffff, 0x00060005,
651         0x91c8, 0xffffffff, 0x00080007,
652         0x91cc, 0xffffffff, 0x0000000b,
653         0x91d0, 0xffffffff, 0x000a0009,
654         0x91d4, 0xffffffff, 0x000d000c,
655         0x9150, 0xffffffff, 0x96940200,
656         0x8708, 0xffffffff, 0x00900100,
657         0xc478, 0xffffffff, 0x00000080,
658         0xc404, 0xffffffff, 0x0020003f,
659         0x30, 0xffffffff, 0x0000001c,
660         0x34, 0x000f0000, 0x000f0000,
661         0x160c, 0xffffffff, 0x00000100,
662         0x1024, 0xffffffff, 0x00000100,
663         0x102c, 0x00000101, 0x00000000,
664         0x20a8, 0xffffffff, 0x00000104,
665         0x264c, 0x000c0000, 0x000c0000,
666         0x2648, 0x000c0000, 0x000c0000,
667         0x55e4, 0xff000fff, 0x00000100,
668         0x55e8, 0x00000001, 0x00000001,
669         0x2f50, 0x00000001, 0x00000001,
670         0x30cc, 0xc0000fff, 0x00000104,
671         0xc1e4, 0x00000001, 0x00000001,
672         0xd0c0, 0xfffffff0, 0x00000100,
673         0xd8c0, 0xfffffff0, 0x00000100
674 };
675
676 static u32 verde_pg_init[] =
677 {
678         0x353c, 0xffffffff, 0x40000,
679         0x3538, 0xffffffff, 0x200010ff,
680         0x353c, 0xffffffff, 0x0,
681         0x353c, 0xffffffff, 0x0,
682         0x353c, 0xffffffff, 0x0,
683         0x353c, 0xffffffff, 0x0,
684         0x353c, 0xffffffff, 0x0,
685         0x353c, 0xffffffff, 0x7007,
686         0x3538, 0xffffffff, 0x300010ff,
687         0x353c, 0xffffffff, 0x0,
688         0x353c, 0xffffffff, 0x0,
689         0x353c, 0xffffffff, 0x0,
690         0x353c, 0xffffffff, 0x0,
691         0x353c, 0xffffffff, 0x0,
692         0x353c, 0xffffffff, 0x400000,
693         0x3538, 0xffffffff, 0x100010ff,
694         0x353c, 0xffffffff, 0x0,
695         0x353c, 0xffffffff, 0x0,
696         0x353c, 0xffffffff, 0x0,
697         0x353c, 0xffffffff, 0x0,
698         0x353c, 0xffffffff, 0x0,
699         0x353c, 0xffffffff, 0x120200,
700         0x3538, 0xffffffff, 0x500010ff,
701         0x353c, 0xffffffff, 0x0,
702         0x353c, 0xffffffff, 0x0,
703         0x353c, 0xffffffff, 0x0,
704         0x353c, 0xffffffff, 0x0,
705         0x353c, 0xffffffff, 0x0,
706         0x353c, 0xffffffff, 0x1e1e16,
707         0x3538, 0xffffffff, 0x600010ff,
708         0x353c, 0xffffffff, 0x0,
709         0x353c, 0xffffffff, 0x0,
710         0x353c, 0xffffffff, 0x0,
711         0x353c, 0xffffffff, 0x0,
712         0x353c, 0xffffffff, 0x0,
713         0x353c, 0xffffffff, 0x171f1e,
714         0x3538, 0xffffffff, 0x700010ff,
715         0x353c, 0xffffffff, 0x0,
716         0x353c, 0xffffffff, 0x0,
717         0x353c, 0xffffffff, 0x0,
718         0x353c, 0xffffffff, 0x0,
719         0x353c, 0xffffffff, 0x0,
720         0x353c, 0xffffffff, 0x0,
721         0x3538, 0xffffffff, 0x9ff,
722         0x3500, 0xffffffff, 0x0,
723         0x3504, 0xffffffff, 0x10000800,
724         0x3504, 0xffffffff, 0xf,
725         0x3504, 0xffffffff, 0xf,
726         0x3500, 0xffffffff, 0x4,
727         0x3504, 0xffffffff, 0x1000051e,
728         0x3504, 0xffffffff, 0xffff,
729         0x3504, 0xffffffff, 0xffff,
730         0x3500, 0xffffffff, 0x8,
731         0x3504, 0xffffffff, 0x80500,
732         0x3500, 0xffffffff, 0x12,
733         0x3504, 0xffffffff, 0x9050c,
734         0x3500, 0xffffffff, 0x1d,
735         0x3504, 0xffffffff, 0xb052c,
736         0x3500, 0xffffffff, 0x2a,
737         0x3504, 0xffffffff, 0x1053e,
738         0x3500, 0xffffffff, 0x2d,
739         0x3504, 0xffffffff, 0x10546,
740         0x3500, 0xffffffff, 0x30,
741         0x3504, 0xffffffff, 0xa054e,
742         0x3500, 0xffffffff, 0x3c,
743         0x3504, 0xffffffff, 0x1055f,
744         0x3500, 0xffffffff, 0x3f,
745         0x3504, 0xffffffff, 0x10567,
746         0x3500, 0xffffffff, 0x42,
747         0x3504, 0xffffffff, 0x1056f,
748         0x3500, 0xffffffff, 0x45,
749         0x3504, 0xffffffff, 0x10572,
750         0x3500, 0xffffffff, 0x48,
751         0x3504, 0xffffffff, 0x20575,
752         0x3500, 0xffffffff, 0x4c,
753         0x3504, 0xffffffff, 0x190801,
754         0x3500, 0xffffffff, 0x67,
755         0x3504, 0xffffffff, 0x1082a,
756         0x3500, 0xffffffff, 0x6a,
757         0x3504, 0xffffffff, 0x1b082d,
758         0x3500, 0xffffffff, 0x87,
759         0x3504, 0xffffffff, 0x310851,
760         0x3500, 0xffffffff, 0xba,
761         0x3504, 0xffffffff, 0x891,
762         0x3500, 0xffffffff, 0xbc,
763         0x3504, 0xffffffff, 0x893,
764         0x3500, 0xffffffff, 0xbe,
765         0x3504, 0xffffffff, 0x20895,
766         0x3500, 0xffffffff, 0xc2,
767         0x3504, 0xffffffff, 0x20899,
768         0x3500, 0xffffffff, 0xc6,
769         0x3504, 0xffffffff, 0x2089d,
770         0x3500, 0xffffffff, 0xca,
771         0x3504, 0xffffffff, 0x8a1,
772         0x3500, 0xffffffff, 0xcc,
773         0x3504, 0xffffffff, 0x8a3,
774         0x3500, 0xffffffff, 0xce,
775         0x3504, 0xffffffff, 0x308a5,
776         0x3500, 0xffffffff, 0xd3,
777         0x3504, 0xffffffff, 0x6d08cd,
778         0x3500, 0xffffffff, 0x142,
779         0x3504, 0xffffffff, 0x2000095a,
780         0x3504, 0xffffffff, 0x1,
781         0x3500, 0xffffffff, 0x144,
782         0x3504, 0xffffffff, 0x301f095b,
783         0x3500, 0xffffffff, 0x165,
784         0x3504, 0xffffffff, 0xc094d,
785         0x3500, 0xffffffff, 0x173,
786         0x3504, 0xffffffff, 0xf096d,
787         0x3500, 0xffffffff, 0x184,
788         0x3504, 0xffffffff, 0x15097f,
789         0x3500, 0xffffffff, 0x19b,
790         0x3504, 0xffffffff, 0xc0998,
791         0x3500, 0xffffffff, 0x1a9,
792         0x3504, 0xffffffff, 0x409a7,
793         0x3500, 0xffffffff, 0x1af,
794         0x3504, 0xffffffff, 0xcdc,
795         0x3500, 0xffffffff, 0x1b1,
796         0x3504, 0xffffffff, 0x800,
797         0x3508, 0xffffffff, 0x6c9b2000,
798         0x3510, 0xfc00, 0x2000,
799         0x3544, 0xffffffff, 0xfc0,
800         0x28d4, 0x00000100, 0x100
801 };
802
803 static void si_init_golden_registers(struct radeon_device *rdev)
804 {
805         switch (rdev->family) {
806         case CHIP_TAHITI:
807                 radeon_program_register_sequence(rdev,
808                                                  tahiti_golden_registers,
809                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
810                 radeon_program_register_sequence(rdev,
811                                                  tahiti_golden_rlc_registers,
812                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
813                 radeon_program_register_sequence(rdev,
814                                                  tahiti_mgcg_cgcg_init,
815                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
816                 radeon_program_register_sequence(rdev,
817                                                  tahiti_golden_registers2,
818                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
819                 break;
820         case CHIP_PITCAIRN:
821                 radeon_program_register_sequence(rdev,
822                                                  pitcairn_golden_registers,
823                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
824                 radeon_program_register_sequence(rdev,
825                                                  pitcairn_golden_rlc_registers,
826                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
827                 radeon_program_register_sequence(rdev,
828                                                  pitcairn_mgcg_cgcg_init,
829                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
830                 break;
831         case CHIP_VERDE:
832                 radeon_program_register_sequence(rdev,
833                                                  verde_golden_registers,
834                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
835                 radeon_program_register_sequence(rdev,
836                                                  verde_golden_rlc_registers,
837                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
838                 radeon_program_register_sequence(rdev,
839                                                  verde_mgcg_cgcg_init,
840                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
841                 radeon_program_register_sequence(rdev,
842                                                  verde_pg_init,
843                                                  (const u32)ARRAY_SIZE(verde_pg_init));
844                 break;
845         case CHIP_OLAND:
846                 radeon_program_register_sequence(rdev,
847                                                  oland_golden_registers,
848                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
849                 radeon_program_register_sequence(rdev,
850                                                  oland_golden_rlc_registers,
851                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
852                 radeon_program_register_sequence(rdev,
853                                                  oland_mgcg_cgcg_init,
854                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
855                 break;
856         default:
857                 break;
858         }
859 }
860
861 #define PCIE_BUS_CLK                10000
862 #define TCLK                        (PCIE_BUS_CLK / 10)
863
864 /**
865  * si_get_xclk - get the xclk
866  *
867  * @rdev: radeon_device pointer
868  *
869  * Returns the reference clock used by the gfx engine
870  * (SI).
871  */
872 u32 si_get_xclk(struct radeon_device *rdev)
873 {
874         u32 reference_clock = rdev->clock.spll.reference_freq;
875         u32 tmp;
876
877         tmp = RREG32(CG_CLKPIN_CNTL_2);
878         if (tmp & MUX_TCLK_TO_XCLK)
879                 return TCLK;
880
881         tmp = RREG32(CG_CLKPIN_CNTL);
882         if (tmp & XTALIN_DIVIDE)
883                 return reference_clock / 4;
884
885         return reference_clock;
886 }
887
888 /* get temperature in millidegrees */
889 int si_get_temp(struct radeon_device *rdev)
890 {
891         u32 temp;
892         int actual_temp = 0;
893
894         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
895                 CTF_TEMP_SHIFT;
896
897         if (temp & 0x200)
898                 actual_temp = 255;
899         else
900                 actual_temp = temp & 0x1ff;
901
902         actual_temp = (actual_temp * 1000);
903
904         return actual_temp;
905 }
906
907 #define TAHITI_IO_MC_REGS_SIZE 36
908
909 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
910         {0x0000006f, 0x03044000},
911         {0x00000070, 0x0480c018},
912         {0x00000071, 0x00000040},
913         {0x00000072, 0x01000000},
914         {0x00000074, 0x000000ff},
915         {0x00000075, 0x00143400},
916         {0x00000076, 0x08ec0800},
917         {0x00000077, 0x040000cc},
918         {0x00000079, 0x00000000},
919         {0x0000007a, 0x21000409},
920         {0x0000007c, 0x00000000},
921         {0x0000007d, 0xe8000000},
922         {0x0000007e, 0x044408a8},
923         {0x0000007f, 0x00000003},
924         {0x00000080, 0x00000000},
925         {0x00000081, 0x01000000},
926         {0x00000082, 0x02000000},
927         {0x00000083, 0x00000000},
928         {0x00000084, 0xe3f3e4f4},
929         {0x00000085, 0x00052024},
930         {0x00000087, 0x00000000},
931         {0x00000088, 0x66036603},
932         {0x00000089, 0x01000000},
933         {0x0000008b, 0x1c0a0000},
934         {0x0000008c, 0xff010000},
935         {0x0000008e, 0xffffefff},
936         {0x0000008f, 0xfff3efff},
937         {0x00000090, 0xfff3efbf},
938         {0x00000094, 0x00101101},
939         {0x00000095, 0x00000fff},
940         {0x00000096, 0x00116fff},
941         {0x00000097, 0x60010000},
942         {0x00000098, 0x10010000},
943         {0x00000099, 0x00006000},
944         {0x0000009a, 0x00001000},
945         {0x0000009f, 0x00a77400}
946 };
947
948 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
949         {0x0000006f, 0x03044000},
950         {0x00000070, 0x0480c018},
951         {0x00000071, 0x00000040},
952         {0x00000072, 0x01000000},
953         {0x00000074, 0x000000ff},
954         {0x00000075, 0x00143400},
955         {0x00000076, 0x08ec0800},
956         {0x00000077, 0x040000cc},
957         {0x00000079, 0x00000000},
958         {0x0000007a, 0x21000409},
959         {0x0000007c, 0x00000000},
960         {0x0000007d, 0xe8000000},
961         {0x0000007e, 0x044408a8},
962         {0x0000007f, 0x00000003},
963         {0x00000080, 0x00000000},
964         {0x00000081, 0x01000000},
965         {0x00000082, 0x02000000},
966         {0x00000083, 0x00000000},
967         {0x00000084, 0xe3f3e4f4},
968         {0x00000085, 0x00052024},
969         {0x00000087, 0x00000000},
970         {0x00000088, 0x66036603},
971         {0x00000089, 0x01000000},
972         {0x0000008b, 0x1c0a0000},
973         {0x0000008c, 0xff010000},
974         {0x0000008e, 0xffffefff},
975         {0x0000008f, 0xfff3efff},
976         {0x00000090, 0xfff3efbf},
977         {0x00000094, 0x00101101},
978         {0x00000095, 0x00000fff},
979         {0x00000096, 0x00116fff},
980         {0x00000097, 0x60010000},
981         {0x00000098, 0x10010000},
982         {0x00000099, 0x00006000},
983         {0x0000009a, 0x00001000},
984         {0x0000009f, 0x00a47400}
985 };
986
987 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
988         {0x0000006f, 0x03044000},
989         {0x00000070, 0x0480c018},
990         {0x00000071, 0x00000040},
991         {0x00000072, 0x01000000},
992         {0x00000074, 0x000000ff},
993         {0x00000075, 0x00143400},
994         {0x00000076, 0x08ec0800},
995         {0x00000077, 0x040000cc},
996         {0x00000079, 0x00000000},
997         {0x0000007a, 0x21000409},
998         {0x0000007c, 0x00000000},
999         {0x0000007d, 0xe8000000},
1000         {0x0000007e, 0x044408a8},
1001         {0x0000007f, 0x00000003},
1002         {0x00000080, 0x00000000},
1003         {0x00000081, 0x01000000},
1004         {0x00000082, 0x02000000},
1005         {0x00000083, 0x00000000},
1006         {0x00000084, 0xe3f3e4f4},
1007         {0x00000085, 0x00052024},
1008         {0x00000087, 0x00000000},
1009         {0x00000088, 0x66036603},
1010         {0x00000089, 0x01000000},
1011         {0x0000008b, 0x1c0a0000},
1012         {0x0000008c, 0xff010000},
1013         {0x0000008e, 0xffffefff},
1014         {0x0000008f, 0xfff3efff},
1015         {0x00000090, 0xfff3efbf},
1016         {0x00000094, 0x00101101},
1017         {0x00000095, 0x00000fff},
1018         {0x00000096, 0x00116fff},
1019         {0x00000097, 0x60010000},
1020         {0x00000098, 0x10010000},
1021         {0x00000099, 0x00006000},
1022         {0x0000009a, 0x00001000},
1023         {0x0000009f, 0x00a37400}
1024 };
1025
1026 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1027         {0x0000006f, 0x03044000},
1028         {0x00000070, 0x0480c018},
1029         {0x00000071, 0x00000040},
1030         {0x00000072, 0x01000000},
1031         {0x00000074, 0x000000ff},
1032         {0x00000075, 0x00143400},
1033         {0x00000076, 0x08ec0800},
1034         {0x00000077, 0x040000cc},
1035         {0x00000079, 0x00000000},
1036         {0x0000007a, 0x21000409},
1037         {0x0000007c, 0x00000000},
1038         {0x0000007d, 0xe8000000},
1039         {0x0000007e, 0x044408a8},
1040         {0x0000007f, 0x00000003},
1041         {0x00000080, 0x00000000},
1042         {0x00000081, 0x01000000},
1043         {0x00000082, 0x02000000},
1044         {0x00000083, 0x00000000},
1045         {0x00000084, 0xe3f3e4f4},
1046         {0x00000085, 0x00052024},
1047         {0x00000087, 0x00000000},
1048         {0x00000088, 0x66036603},
1049         {0x00000089, 0x01000000},
1050         {0x0000008b, 0x1c0a0000},
1051         {0x0000008c, 0xff010000},
1052         {0x0000008e, 0xffffefff},
1053         {0x0000008f, 0xfff3efff},
1054         {0x00000090, 0xfff3efbf},
1055         {0x00000094, 0x00101101},
1056         {0x00000095, 0x00000fff},
1057         {0x00000096, 0x00116fff},
1058         {0x00000097, 0x60010000},
1059         {0x00000098, 0x10010000},
1060         {0x00000099, 0x00006000},
1061         {0x0000009a, 0x00001000},
1062         {0x0000009f, 0x00a17730}
1063 };
1064
1065 /* ucode loading */
1066 static int si_mc_load_microcode(struct radeon_device *rdev)
1067 {
1068         const __be32 *fw_data;
1069         u32 running, blackout = 0;
1070         u32 *io_mc_regs;
1071         int i, ucode_size, regs_size;
1072
1073         if (!rdev->mc_fw)
1074                 return -EINVAL;
1075
1076         switch (rdev->family) {
1077         case CHIP_TAHITI:
1078                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1079                 ucode_size = SI_MC_UCODE_SIZE;
1080                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1081                 break;
1082         case CHIP_PITCAIRN:
1083                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1084                 ucode_size = SI_MC_UCODE_SIZE;
1085                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1086                 break;
1087         case CHIP_VERDE:
1088         default:
1089                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1090                 ucode_size = SI_MC_UCODE_SIZE;
1091                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1092                 break;
1093         case CHIP_OLAND:
1094                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1095                 ucode_size = OLAND_MC_UCODE_SIZE;
1096                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1097                 break;
1098         }
1099
1100         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1101
1102         if (running == 0) {
1103                 if (running) {
1104                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1105                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1106                 }
1107
1108                 /* reset the engine and set to writable */
1109                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1110                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1111
1112                 /* load mc io regs */
1113                 for (i = 0; i < regs_size; i++) {
1114                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1115                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1116                 }
1117                 /* load the MC ucode */
1118                 fw_data = (const __be32 *)rdev->mc_fw->data;
1119                 for (i = 0; i < ucode_size; i++)
1120                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1121
1122                 /* put the engine back into the active state */
1123                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1124                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1125                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1126
1127                 /* wait for training to complete */
1128                 for (i = 0; i < rdev->usec_timeout; i++) {
1129                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1130                                 break;
1131                         udelay(1);
1132                 }
1133                 for (i = 0; i < rdev->usec_timeout; i++) {
1134                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1135                                 break;
1136                         udelay(1);
1137                 }
1138
1139                 if (running)
1140                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1141         }
1142
1143         return 0;
1144 }
1145
1146 static int si_init_microcode(struct radeon_device *rdev)
1147 {
1148         struct platform_device *pdev;
1149         const char *chip_name;
1150         const char *rlc_chip_name;
1151         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1152         char fw_name[30];
1153         int err;
1154
1155         DRM_DEBUG("\n");
1156
1157         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1158         err = IS_ERR(pdev);
1159         if (err) {
1160                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1161                 return -EINVAL;
1162         }
1163
1164         switch (rdev->family) {
1165         case CHIP_TAHITI:
1166                 chip_name = "TAHITI";
1167                 rlc_chip_name = "TAHITI";
1168                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1169                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1170                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1171                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1172                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1173                 break;
1174         case CHIP_PITCAIRN:
1175                 chip_name = "PITCAIRN";
1176                 rlc_chip_name = "PITCAIRN";
1177                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1178                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1179                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1180                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1181                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1182                 break;
1183         case CHIP_VERDE:
1184                 chip_name = "VERDE";
1185                 rlc_chip_name = "VERDE";
1186                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1187                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1188                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1189                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1190                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1191                 break;
1192         case CHIP_OLAND:
1193                 chip_name = "OLAND";
1194                 rlc_chip_name = "OLAND";
1195                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1196                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1197                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1198                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1199                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1200                 break;
1201         default: BUG();
1202         }
1203
1204         DRM_INFO("Loading %s Microcode\n", chip_name);
1205
1206         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1207         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1208         if (err)
1209                 goto out;
1210         if (rdev->pfp_fw->size != pfp_req_size) {
1211                 printk(KERN_ERR
1212                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1213                        rdev->pfp_fw->size, fw_name);
1214                 err = -EINVAL;
1215                 goto out;
1216         }
1217
1218         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1219         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1220         if (err)
1221                 goto out;
1222         if (rdev->me_fw->size != me_req_size) {
1223                 printk(KERN_ERR
1224                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1225                        rdev->me_fw->size, fw_name);
1226                 err = -EINVAL;
1227         }
1228
1229         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1230         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1231         if (err)
1232                 goto out;
1233         if (rdev->ce_fw->size != ce_req_size) {
1234                 printk(KERN_ERR
1235                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1236                        rdev->ce_fw->size, fw_name);
1237                 err = -EINVAL;
1238         }
1239
1240         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1241         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1242         if (err)
1243                 goto out;
1244         if (rdev->rlc_fw->size != rlc_req_size) {
1245                 printk(KERN_ERR
1246                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1247                        rdev->rlc_fw->size, fw_name);
1248                 err = -EINVAL;
1249         }
1250
1251         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1252         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1253         if (err)
1254                 goto out;
1255         if (rdev->mc_fw->size != mc_req_size) {
1256                 printk(KERN_ERR
1257                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1258                        rdev->mc_fw->size, fw_name);
1259                 err = -EINVAL;
1260         }
1261
1262 out:
1263         platform_device_unregister(pdev);
1264
1265         if (err) {
1266                 if (err != -EINVAL)
1267                         printk(KERN_ERR
1268                                "si_cp: Failed to load firmware \"%s\"\n",
1269                                fw_name);
1270                 release_firmware(rdev->pfp_fw);
1271                 rdev->pfp_fw = NULL;
1272                 release_firmware(rdev->me_fw);
1273                 rdev->me_fw = NULL;
1274                 release_firmware(rdev->ce_fw);
1275                 rdev->ce_fw = NULL;
1276                 release_firmware(rdev->rlc_fw);
1277                 rdev->rlc_fw = NULL;
1278                 release_firmware(rdev->mc_fw);
1279                 rdev->mc_fw = NULL;
1280         }
1281         return err;
1282 }
1283
1284 /* watermark setup */
1285 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1286                                    struct radeon_crtc *radeon_crtc,
1287                                    struct drm_display_mode *mode,
1288                                    struct drm_display_mode *other_mode)
1289 {
1290         u32 tmp;
1291         /*
1292          * Line Buffer Setup
1293          * There are 3 line buffers, each one shared by 2 display controllers.
1294          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1295          * the display controllers.  The paritioning is done via one of four
1296          * preset allocations specified in bits 21:20:
1297          *  0 - half lb
1298          *  2 - whole lb, other crtc must be disabled
1299          */
1300         /* this can get tricky if we have two large displays on a paired group
1301          * of crtcs.  Ideally for multiple large displays we'd assign them to
1302          * non-linked crtcs for maximum line buffer allocation.
1303          */
1304         if (radeon_crtc->base.enabled && mode) {
1305                 if (other_mode)
1306                         tmp = 0; /* 1/2 */
1307                 else
1308                         tmp = 2; /* whole */
1309         } else
1310                 tmp = 0;
1311
1312         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1313                DC_LB_MEMORY_CONFIG(tmp));
1314
1315         if (radeon_crtc->base.enabled && mode) {
1316                 switch (tmp) {
1317                 case 0:
1318                 default:
1319                         return 4096 * 2;
1320                 case 2:
1321                         return 8192 * 2;
1322                 }
1323         }
1324
1325         /* controller not enabled, so no lb used */
1326         return 0;
1327 }
1328
1329 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1330 {
1331         u32 tmp = RREG32(MC_SHARED_CHMAP);
1332
1333         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1334         case 0:
1335         default:
1336                 return 1;
1337         case 1:
1338                 return 2;
1339         case 2:
1340                 return 4;
1341         case 3:
1342                 return 8;
1343         case 4:
1344                 return 3;
1345         case 5:
1346                 return 6;
1347         case 6:
1348                 return 10;
1349         case 7:
1350                 return 12;
1351         case 8:
1352                 return 16;
1353         }
1354 }
1355
1356 struct dce6_wm_params {
1357         u32 dram_channels; /* number of dram channels */
1358         u32 yclk;          /* bandwidth per dram data pin in kHz */
1359         u32 sclk;          /* engine clock in kHz */
1360         u32 disp_clk;      /* display clock in kHz */
1361         u32 src_width;     /* viewport width */
1362         u32 active_time;   /* active display time in ns */
1363         u32 blank_time;    /* blank time in ns */
1364         bool interlaced;    /* mode is interlaced */
1365         fixed20_12 vsc;    /* vertical scale ratio */
1366         u32 num_heads;     /* number of active crtcs */
1367         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1368         u32 lb_size;       /* line buffer allocated to pipe */
1369         u32 vtaps;         /* vertical scaler taps */
1370 };
1371
1372 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1373 {
1374         /* Calculate raw DRAM Bandwidth */
1375         fixed20_12 dram_efficiency; /* 0.7 */
1376         fixed20_12 yclk, dram_channels, bandwidth;
1377         fixed20_12 a;
1378
1379         a.full = dfixed_const(1000);
1380         yclk.full = dfixed_const(wm->yclk);
1381         yclk.full = dfixed_div(yclk, a);
1382         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1383         a.full = dfixed_const(10);
1384         dram_efficiency.full = dfixed_const(7);
1385         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1386         bandwidth.full = dfixed_mul(dram_channels, yclk);
1387         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1388
1389         return dfixed_trunc(bandwidth);
1390 }
1391
1392 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1393 {
1394         /* Calculate DRAM Bandwidth and the part allocated to display. */
1395         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1396         fixed20_12 yclk, dram_channels, bandwidth;
1397         fixed20_12 a;
1398
1399         a.full = dfixed_const(1000);
1400         yclk.full = dfixed_const(wm->yclk);
1401         yclk.full = dfixed_div(yclk, a);
1402         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1403         a.full = dfixed_const(10);
1404         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1405         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1406         bandwidth.full = dfixed_mul(dram_channels, yclk);
1407         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1408
1409         return dfixed_trunc(bandwidth);
1410 }
1411
1412 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1413 {
1414         /* Calculate the display Data return Bandwidth */
1415         fixed20_12 return_efficiency; /* 0.8 */
1416         fixed20_12 sclk, bandwidth;
1417         fixed20_12 a;
1418
1419         a.full = dfixed_const(1000);
1420         sclk.full = dfixed_const(wm->sclk);
1421         sclk.full = dfixed_div(sclk, a);
1422         a.full = dfixed_const(10);
1423         return_efficiency.full = dfixed_const(8);
1424         return_efficiency.full = dfixed_div(return_efficiency, a);
1425         a.full = dfixed_const(32);
1426         bandwidth.full = dfixed_mul(a, sclk);
1427         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1428
1429         return dfixed_trunc(bandwidth);
1430 }
1431
1432 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1433 {
1434         return 32;
1435 }
1436
1437 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1438 {
1439         /* Calculate the DMIF Request Bandwidth */
1440         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1441         fixed20_12 disp_clk, sclk, bandwidth;
1442         fixed20_12 a, b1, b2;
1443         u32 min_bandwidth;
1444
1445         a.full = dfixed_const(1000);
1446         disp_clk.full = dfixed_const(wm->disp_clk);
1447         disp_clk.full = dfixed_div(disp_clk, a);
1448         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1449         b1.full = dfixed_mul(a, disp_clk);
1450
1451         a.full = dfixed_const(1000);
1452         sclk.full = dfixed_const(wm->sclk);
1453         sclk.full = dfixed_div(sclk, a);
1454         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1455         b2.full = dfixed_mul(a, sclk);
1456
1457         a.full = dfixed_const(10);
1458         disp_clk_request_efficiency.full = dfixed_const(8);
1459         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1460
1461         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1462
1463         a.full = dfixed_const(min_bandwidth);
1464         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1465
1466         return dfixed_trunc(bandwidth);
1467 }
1468
1469 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1470 {
1471         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1472         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1473         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1474         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1475
1476         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1477 }
1478
1479 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1480 {
1481         /* Calculate the display mode Average Bandwidth
1482          * DisplayMode should contain the source and destination dimensions,
1483          * timing, etc.
1484          */
1485         fixed20_12 bpp;
1486         fixed20_12 line_time;
1487         fixed20_12 src_width;
1488         fixed20_12 bandwidth;
1489         fixed20_12 a;
1490
1491         a.full = dfixed_const(1000);
1492         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1493         line_time.full = dfixed_div(line_time, a);
1494         bpp.full = dfixed_const(wm->bytes_per_pixel);
1495         src_width.full = dfixed_const(wm->src_width);
1496         bandwidth.full = dfixed_mul(src_width, bpp);
1497         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1498         bandwidth.full = dfixed_div(bandwidth, line_time);
1499
1500         return dfixed_trunc(bandwidth);
1501 }
1502
1503 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1504 {
1505         /* First calcualte the latency in ns */
1506         u32 mc_latency = 2000; /* 2000 ns. */
1507         u32 available_bandwidth = dce6_available_bandwidth(wm);
1508         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1509         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1510         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1511         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1512                 (wm->num_heads * cursor_line_pair_return_time);
1513         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1514         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1515         u32 tmp, dmif_size = 12288;
1516         fixed20_12 a, b, c;
1517
1518         if (wm->num_heads == 0)
1519                 return 0;
1520
1521         a.full = dfixed_const(2);
1522         b.full = dfixed_const(1);
1523         if ((wm->vsc.full > a.full) ||
1524             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1525             (wm->vtaps >= 5) ||
1526             ((wm->vsc.full >= a.full) && wm->interlaced))
1527                 max_src_lines_per_dst_line = 4;
1528         else
1529                 max_src_lines_per_dst_line = 2;
1530
1531         a.full = dfixed_const(available_bandwidth);
1532         b.full = dfixed_const(wm->num_heads);
1533         a.full = dfixed_div(a, b);
1534
1535         b.full = dfixed_const(mc_latency + 512);
1536         c.full = dfixed_const(wm->disp_clk);
1537         b.full = dfixed_div(b, c);
1538
1539         c.full = dfixed_const(dmif_size);
1540         b.full = dfixed_div(c, b);
1541
1542         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1543
1544         b.full = dfixed_const(1000);
1545         c.full = dfixed_const(wm->disp_clk);
1546         b.full = dfixed_div(c, b);
1547         c.full = dfixed_const(wm->bytes_per_pixel);
1548         b.full = dfixed_mul(b, c);
1549
1550         lb_fill_bw = min(tmp, dfixed_trunc(b));
1551
1552         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1553         b.full = dfixed_const(1000);
1554         c.full = dfixed_const(lb_fill_bw);
1555         b.full = dfixed_div(c, b);
1556         a.full = dfixed_div(a, b);
1557         line_fill_time = dfixed_trunc(a);
1558
1559         if (line_fill_time < wm->active_time)
1560                 return latency;
1561         else
1562                 return latency + (line_fill_time - wm->active_time);
1563
1564 }
1565
1566 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1567 {
1568         if (dce6_average_bandwidth(wm) <=
1569             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1570                 return true;
1571         else
1572                 return false;
1573 };
1574
1575 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1576 {
1577         if (dce6_average_bandwidth(wm) <=
1578             (dce6_available_bandwidth(wm) / wm->num_heads))
1579                 return true;
1580         else
1581                 return false;
1582 };
1583
1584 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1585 {
1586         u32 lb_partitions = wm->lb_size / wm->src_width;
1587         u32 line_time = wm->active_time + wm->blank_time;
1588         u32 latency_tolerant_lines;
1589         u32 latency_hiding;
1590         fixed20_12 a;
1591
1592         a.full = dfixed_const(1);
1593         if (wm->vsc.full > a.full)
1594                 latency_tolerant_lines = 1;
1595         else {
1596                 if (lb_partitions <= (wm->vtaps + 1))
1597                         latency_tolerant_lines = 1;
1598                 else
1599                         latency_tolerant_lines = 2;
1600         }
1601
1602         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1603
1604         if (dce6_latency_watermark(wm) <= latency_hiding)
1605                 return true;
1606         else
1607                 return false;
1608 }
1609
1610 static void dce6_program_watermarks(struct radeon_device *rdev,
1611                                          struct radeon_crtc *radeon_crtc,
1612                                          u32 lb_size, u32 num_heads)
1613 {
1614         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1615         struct dce6_wm_params wm;
1616         u32 pixel_period;
1617         u32 line_time = 0;
1618         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1619         u32 priority_a_mark = 0, priority_b_mark = 0;
1620         u32 priority_a_cnt = PRIORITY_OFF;
1621         u32 priority_b_cnt = PRIORITY_OFF;
1622         u32 tmp, arb_control3;
1623         fixed20_12 a, b, c;
1624
1625         if (radeon_crtc->base.enabled && num_heads && mode) {
1626                 pixel_period = 1000000 / (u32)mode->clock;
1627                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1628                 priority_a_cnt = 0;
1629                 priority_b_cnt = 0;
1630
1631                 wm.yclk = rdev->pm.current_mclk * 10;
1632                 wm.sclk = rdev->pm.current_sclk * 10;
1633                 wm.disp_clk = mode->clock;
1634                 wm.src_width = mode->crtc_hdisplay;
1635                 wm.active_time = mode->crtc_hdisplay * pixel_period;
1636                 wm.blank_time = line_time - wm.active_time;
1637                 wm.interlaced = false;
1638                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1639                         wm.interlaced = true;
1640                 wm.vsc = radeon_crtc->vsc;
1641                 wm.vtaps = 1;
1642                 if (radeon_crtc->rmx_type != RMX_OFF)
1643                         wm.vtaps = 2;
1644                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1645                 wm.lb_size = lb_size;
1646                 if (rdev->family == CHIP_ARUBA)
1647                         wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1648                 else
1649                         wm.dram_channels = si_get_number_of_dram_channels(rdev);
1650                 wm.num_heads = num_heads;
1651
1652                 /* set for high clocks */
1653                 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1654                 /* set for low clocks */
1655                 /* wm.yclk = low clk; wm.sclk = low clk */
1656                 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1657
1658                 /* possibly force display priority to high */
1659                 /* should really do this at mode validation time... */
1660                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1661                     !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1662                     !dce6_check_latency_hiding(&wm) ||
1663                     (rdev->disp_priority == 2)) {
1664                         DRM_DEBUG_KMS("force priority to high\n");
1665                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1666                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1667                 }
1668
1669                 a.full = dfixed_const(1000);
1670                 b.full = dfixed_const(mode->clock);
1671                 b.full = dfixed_div(b, a);
1672                 c.full = dfixed_const(latency_watermark_a);
1673                 c.full = dfixed_mul(c, b);
1674                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1675                 c.full = dfixed_div(c, a);
1676                 a.full = dfixed_const(16);
1677                 c.full = dfixed_div(c, a);
1678                 priority_a_mark = dfixed_trunc(c);
1679                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1680
1681                 a.full = dfixed_const(1000);
1682                 b.full = dfixed_const(mode->clock);
1683                 b.full = dfixed_div(b, a);
1684                 c.full = dfixed_const(latency_watermark_b);
1685                 c.full = dfixed_mul(c, b);
1686                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1687                 c.full = dfixed_div(c, a);
1688                 a.full = dfixed_const(16);
1689                 c.full = dfixed_div(c, a);
1690                 priority_b_mark = dfixed_trunc(c);
1691                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1692         }
1693
1694         /* select wm A */
1695         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1696         tmp = arb_control3;
1697         tmp &= ~LATENCY_WATERMARK_MASK(3);
1698         tmp |= LATENCY_WATERMARK_MASK(1);
1699         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1700         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1701                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1702                 LATENCY_HIGH_WATERMARK(line_time)));
1703         /* select wm B */
1704         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1705         tmp &= ~LATENCY_WATERMARK_MASK(3);
1706         tmp |= LATENCY_WATERMARK_MASK(2);
1707         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1708         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1709                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1710                 LATENCY_HIGH_WATERMARK(line_time)));
1711         /* restore original selection */
1712         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1713
1714         /* write the priority marks */
1715         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1716         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1717
1718 }
1719
1720 void dce6_bandwidth_update(struct radeon_device *rdev)
1721 {
1722         struct drm_display_mode *mode0 = NULL;
1723         struct drm_display_mode *mode1 = NULL;
1724         u32 num_heads = 0, lb_size;
1725         int i;
1726
1727         radeon_update_display_priority(rdev);
1728
1729         for (i = 0; i < rdev->num_crtc; i++) {
1730                 if (rdev->mode_info.crtcs[i]->base.enabled)
1731                         num_heads++;
1732         }
1733         for (i = 0; i < rdev->num_crtc; i += 2) {
1734                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1735                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1736                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1737                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1738                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1739                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1740         }
1741 }
1742
1743 /*
1744  * Core functions
1745  */
1746 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1747 {
1748         const u32 num_tile_mode_states = 32;
1749         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1750
1751         switch (rdev->config.si.mem_row_size_in_kb) {
1752         case 1:
1753                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754                 break;
1755         case 2:
1756         default:
1757                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758                 break;
1759         case 4:
1760                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761                 break;
1762         }
1763
1764         if ((rdev->family == CHIP_TAHITI) ||
1765             (rdev->family == CHIP_PITCAIRN)) {
1766                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767                         switch (reg_offset) {
1768                         case 0:  /* non-AA compressed depth or any compressed stencil */
1769                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1771                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1772                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1773                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1774                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1776                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1777                                 break;
1778                         case 1:  /* 2xAA/4xAA compressed depth only */
1779                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1781                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1782                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1783                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1784                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1785                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1786                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1787                                 break;
1788                         case 2:  /* 8xAA compressed depth only */
1789                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1791                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1792                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1793                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1794                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1797                                 break;
1798                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1799                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1800                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1801                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1802                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1803                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1804                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1806                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1807                                 break;
1808                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1809                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1810                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1811                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1812                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1813                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1814                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1815                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1816                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1817                                 break;
1818                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1820                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1821                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1822                                                  TILE_SPLIT(split_equal_to_row_size) |
1823                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1824                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1825                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1826                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1827                                 break;
1828                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1831                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832                                                  TILE_SPLIT(split_equal_to_row_size) |
1833                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1834                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1837                                 break;
1838                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1839                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1840                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1841                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1842                                                  TILE_SPLIT(split_equal_to_row_size) |
1843                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1844                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1845                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1846                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1847                                 break;
1848                         case 8:  /* 1D and 1D Array Surfaces */
1849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1850                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1851                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1852                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1854                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1855                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1856                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1857                                 break;
1858                         case 9:  /* Displayable maps. */
1859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1860                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1861                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1862                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1863                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1864                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1865                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1866                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1867                                 break;
1868                         case 10:  /* Display 8bpp. */
1869                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1871                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1872                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1873                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1874                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1875                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1876                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1877                                 break;
1878                         case 11:  /* Display 16bpp. */
1879                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1880                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1881                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1882                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1883                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1884                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1887                                 break;
1888                         case 12:  /* Display 32bpp. */
1889                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1890                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1892                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1893                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1894                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1896                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1897                                 break;
1898                         case 13:  /* Thin. */
1899                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1900                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1901                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1902                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1903                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1904                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1906                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1907                                 break;
1908                         case 14:  /* Thin 8 bpp. */
1909                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1910                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1911                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1912                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1913                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1914                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1917                                 break;
1918                         case 15:  /* Thin 16 bpp. */
1919                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1920                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1921                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1922                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1923                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1924                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1926                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1927                                 break;
1928                         case 16:  /* Thin 32 bpp. */
1929                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1930                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1931                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1932                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1933                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1934                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1936                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1937                                 break;
1938                         case 17:  /* Thin 64 bpp. */
1939                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1941                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1942                                                  TILE_SPLIT(split_equal_to_row_size) |
1943                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1944                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1947                                 break;
1948                         case 21:  /* 8 bpp PRT. */
1949                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1950                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1951                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1952                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1953                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1954                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1955                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1956                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1957                                 break;
1958                         case 22:  /* 16 bpp PRT */
1959                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1960                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1961                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1962                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1963                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1964                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1967                                 break;
1968                         case 23:  /* 32 bpp PRT */
1969                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1971                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1972                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1973                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1974                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1977                                 break;
1978                         case 24:  /* 64 bpp PRT */
1979                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1981                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1982                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1983                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1984                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1986                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1987                                 break;
1988                         case 25:  /* 128 bpp PRT */
1989                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1990                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1991                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1992                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1993                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
1994                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1995                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1996                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1997                                 break;
1998                         default:
1999                                 gb_tile_moden = 0;
2000                                 break;
2001                         }
2002                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2003                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2004                 }
2005         } else if ((rdev->family == CHIP_VERDE) ||
2006                    (rdev->family == CHIP_OLAND) ||
2007                    (rdev->family == CHIP_HAINAN)) {
2008                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2009                         switch (reg_offset) {
2010                         case 0:  /* non-AA compressed depth or any compressed stencil */
2011                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2012                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2013                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2014                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2015                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2016                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2017                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2018                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2019                                 break;
2020                         case 1:  /* 2xAA/4xAA compressed depth only */
2021                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2022                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2023                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2024                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2025                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2026                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2027                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2028                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2029                                 break;
2030                         case 2:  /* 8xAA compressed depth only */
2031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2032                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2033                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2034                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2035                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2036                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2037                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2038                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2039                                 break;
2040                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2041                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2043                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2044                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2045                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2046                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2047                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2048                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2049                                 break;
2050                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2051                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2053                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2054                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2055                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2056                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2057                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2058                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2059                                 break;
2060                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2061                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2062                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2063                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064                                                  TILE_SPLIT(split_equal_to_row_size) |
2065                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2066                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2067                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2068                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2069                                 break;
2070                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2071                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2072                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2073                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2074                                                  TILE_SPLIT(split_equal_to_row_size) |
2075                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2076                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2079                                 break;
2080                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2081                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2082                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2083                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2084                                                  TILE_SPLIT(split_equal_to_row_size) |
2085                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2086                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2087                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2088                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2089                                 break;
2090                         case 8:  /* 1D and 1D Array Surfaces */
2091                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2092                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2093                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2094                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2095                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2096                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2097                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2098                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2099                                 break;
2100                         case 9:  /* Displayable maps. */
2101                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2102                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2103                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2104                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2105                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2106                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2107                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2108                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2109                                 break;
2110                         case 10:  /* Display 8bpp. */
2111                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2113                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2115                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2116                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2117                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2118                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2119                                 break;
2120                         case 11:  /* Display 16bpp. */
2121                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2122                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2124                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2125                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2126                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2127                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2128                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2129                                 break;
2130                         case 12:  /* Display 32bpp. */
2131                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2132                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2133                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2134                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2135                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2136                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2137                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2138                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2139                                 break;
2140                         case 13:  /* Thin. */
2141                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2142                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2143                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2144                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2145                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2146                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2148                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2149                                 break;
2150                         case 14:  /* Thin 8 bpp. */
2151                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2153                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2155                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2156                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2158                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2159                                 break;
2160                         case 15:  /* Thin 16 bpp. */
2161                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2163                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2164                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2165                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2166                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2168                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2169                                 break;
2170                         case 16:  /* Thin 32 bpp. */
2171                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2173                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2175                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2176                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2178                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2179                                 break;
2180                         case 17:  /* Thin 64 bpp. */
2181                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2183                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184                                                  TILE_SPLIT(split_equal_to_row_size) |
2185                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2186                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2189                                 break;
2190                         case 21:  /* 8 bpp PRT. */
2191                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2192                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2193                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2194                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2195                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2196                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2197                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2198                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2199                                 break;
2200                         case 22:  /* 16 bpp PRT */
2201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2203                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2204                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2205                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2206                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2208                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2209                                 break;
2210                         case 23:  /* 32 bpp PRT */
2211                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2213                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2214                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2215                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2216                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2217                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2218                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2219                                 break;
2220                         case 24:  /* 64 bpp PRT */
2221                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2223                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2224                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2225                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2226                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2227                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2228                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2229                                 break;
2230                         case 25:  /* 128 bpp PRT */
2231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2233                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2234                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2235                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2236                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2239                                 break;
2240                         default:
2241                                 gb_tile_moden = 0;
2242                                 break;
2243                         }
2244                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2245                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2246                 }
2247         } else
2248                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2249 }
2250
2251 static void si_select_se_sh(struct radeon_device *rdev,
2252                             u32 se_num, u32 sh_num)
2253 {
2254         u32 data = INSTANCE_BROADCAST_WRITES;
2255
2256         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2257                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2258         else if (se_num == 0xffffffff)
2259                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2260         else if (sh_num == 0xffffffff)
2261                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2262         else
2263                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2264         WREG32(GRBM_GFX_INDEX, data);
2265 }
2266
2267 static u32 si_create_bitmask(u32 bit_width)
2268 {
2269         u32 i, mask = 0;
2270
2271         for (i = 0; i < bit_width; i++) {
2272                 mask <<= 1;
2273                 mask |= 1;
2274         }
2275         return mask;
2276 }
2277
2278 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2279 {
2280         u32 data, mask;
2281
2282         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2283         if (data & 1)
2284                 data &= INACTIVE_CUS_MASK;
2285         else
2286                 data = 0;
2287         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2288
2289         data >>= INACTIVE_CUS_SHIFT;
2290
2291         mask = si_create_bitmask(cu_per_sh);
2292
2293         return ~data & mask;
2294 }
2295
2296 static void si_setup_spi(struct radeon_device *rdev,
2297                          u32 se_num, u32 sh_per_se,
2298                          u32 cu_per_sh)
2299 {
2300         int i, j, k;
2301         u32 data, mask, active_cu;
2302
2303         for (i = 0; i < se_num; i++) {
2304                 for (j = 0; j < sh_per_se; j++) {
2305                         si_select_se_sh(rdev, i, j);
2306                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2307                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2308
2309                         mask = 1;
2310                         for (k = 0; k < 16; k++) {
2311                                 mask <<= k;
2312                                 if (active_cu & mask) {
2313                                         data &= ~mask;
2314                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2315                                         break;
2316                                 }
2317                         }
2318                 }
2319         }
2320         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2321 }
2322
2323 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2324                               u32 max_rb_num, u32 se_num,
2325                               u32 sh_per_se)
2326 {
2327         u32 data, mask;
2328
2329         data = RREG32(CC_RB_BACKEND_DISABLE);
2330         if (data & 1)
2331                 data &= BACKEND_DISABLE_MASK;
2332         else
2333                 data = 0;
2334         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2335
2336         data >>= BACKEND_DISABLE_SHIFT;
2337
2338         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2339
2340         return data & mask;
2341 }
2342
2343 static void si_setup_rb(struct radeon_device *rdev,
2344                         u32 se_num, u32 sh_per_se,
2345                         u32 max_rb_num)
2346 {
2347         int i, j;
2348         u32 data, mask;
2349         u32 disabled_rbs = 0;
2350         u32 enabled_rbs = 0;
2351
2352         for (i = 0; i < se_num; i++) {
2353                 for (j = 0; j < sh_per_se; j++) {
2354                         si_select_se_sh(rdev, i, j);
2355                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2356                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2357                 }
2358         }
2359         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2360
2361         mask = 1;
2362         for (i = 0; i < max_rb_num; i++) {
2363                 if (!(disabled_rbs & mask))
2364                         enabled_rbs |= mask;
2365                 mask <<= 1;
2366         }
2367
2368         for (i = 0; i < se_num; i++) {
2369                 si_select_se_sh(rdev, i, 0xffffffff);
2370                 data = 0;
2371                 for (j = 0; j < sh_per_se; j++) {
2372                         switch (enabled_rbs & 3) {
2373                         case 1:
2374                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2375                                 break;
2376                         case 2:
2377                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2378                                 break;
2379                         case 3:
2380                         default:
2381                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2382                                 break;
2383                         }
2384                         enabled_rbs >>= 2;
2385                 }
2386                 WREG32(PA_SC_RASTER_CONFIG, data);
2387         }
2388         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2389 }
2390
2391 static void si_gpu_init(struct radeon_device *rdev)
2392 {
2393         u32 gb_addr_config = 0;
2394         u32 mc_shared_chmap, mc_arb_ramcfg;
2395         u32 sx_debug_1;
2396         u32 hdp_host_path_cntl;
2397         u32 tmp;
2398         int i, j;
2399
2400         switch (rdev->family) {
2401         case CHIP_TAHITI:
2402                 rdev->config.si.max_shader_engines = 2;
2403                 rdev->config.si.max_tile_pipes = 12;
2404                 rdev->config.si.max_cu_per_sh = 8;
2405                 rdev->config.si.max_sh_per_se = 2;
2406                 rdev->config.si.max_backends_per_se = 4;
2407                 rdev->config.si.max_texture_channel_caches = 12;
2408                 rdev->config.si.max_gprs = 256;
2409                 rdev->config.si.max_gs_threads = 32;
2410                 rdev->config.si.max_hw_contexts = 8;
2411
2412                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2413                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2414                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2415                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2416                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2417                 break;
2418         case CHIP_PITCAIRN:
2419                 rdev->config.si.max_shader_engines = 2;
2420                 rdev->config.si.max_tile_pipes = 8;
2421                 rdev->config.si.max_cu_per_sh = 5;
2422                 rdev->config.si.max_sh_per_se = 2;
2423                 rdev->config.si.max_backends_per_se = 4;
2424                 rdev->config.si.max_texture_channel_caches = 8;
2425                 rdev->config.si.max_gprs = 256;
2426                 rdev->config.si.max_gs_threads = 32;
2427                 rdev->config.si.max_hw_contexts = 8;
2428
2429                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2430                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2431                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2432                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2433                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2434                 break;
2435         case CHIP_VERDE:
2436         default:
2437                 rdev->config.si.max_shader_engines = 1;
2438                 rdev->config.si.max_tile_pipes = 4;
2439                 rdev->config.si.max_cu_per_sh = 2;
2440                 rdev->config.si.max_sh_per_se = 2;
2441                 rdev->config.si.max_backends_per_se = 4;
2442                 rdev->config.si.max_texture_channel_caches = 4;
2443                 rdev->config.si.max_gprs = 256;
2444                 rdev->config.si.max_gs_threads = 32;
2445                 rdev->config.si.max_hw_contexts = 8;
2446
2447                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2448                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2449                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2450                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2451                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2452                 break;
2453         case CHIP_OLAND:
2454                 rdev->config.si.max_shader_engines = 1;
2455                 rdev->config.si.max_tile_pipes = 4;
2456                 rdev->config.si.max_cu_per_sh = 6;
2457                 rdev->config.si.max_sh_per_se = 1;
2458                 rdev->config.si.max_backends_per_se = 2;
2459                 rdev->config.si.max_texture_channel_caches = 4;
2460                 rdev->config.si.max_gprs = 256;
2461                 rdev->config.si.max_gs_threads = 16;
2462                 rdev->config.si.max_hw_contexts = 8;
2463
2464                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2465                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2466                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2467                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2468                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2469                 break;
2470         case CHIP_HAINAN:
2471                 rdev->config.si.max_shader_engines = 1;
2472                 rdev->config.si.max_tile_pipes = 4;
2473                 rdev->config.si.max_cu_per_sh = 5;
2474                 rdev->config.si.max_sh_per_se = 1;
2475                 rdev->config.si.max_backends_per_se = 1;
2476                 rdev->config.si.max_texture_channel_caches = 2;
2477                 rdev->config.si.max_gprs = 256;
2478                 rdev->config.si.max_gs_threads = 16;
2479                 rdev->config.si.max_hw_contexts = 8;
2480
2481                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2482                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2483                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2484                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2485                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2486                 break;
2487         }
2488
2489         /* Initialize HDP */
2490         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2491                 WREG32((0x2c14 + j), 0x00000000);
2492                 WREG32((0x2c18 + j), 0x00000000);
2493                 WREG32((0x2c1c + j), 0x00000000);
2494                 WREG32((0x2c20 + j), 0x00000000);
2495                 WREG32((0x2c24 + j), 0x00000000);
2496         }
2497
2498         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2499
2500         evergreen_fix_pci_max_read_req_size(rdev);
2501
2502         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2503
2504         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2505         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2506
2507         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2508         rdev->config.si.mem_max_burst_length_bytes = 256;
2509         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2510         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2511         if (rdev->config.si.mem_row_size_in_kb > 4)
2512                 rdev->config.si.mem_row_size_in_kb = 4;
2513         /* XXX use MC settings? */
2514         rdev->config.si.shader_engine_tile_size = 32;
2515         rdev->config.si.num_gpus = 1;
2516         rdev->config.si.multi_gpu_tile_size = 64;
2517
2518         /* fix up row size */
2519         gb_addr_config &= ~ROW_SIZE_MASK;
2520         switch (rdev->config.si.mem_row_size_in_kb) {
2521         case 1:
2522         default:
2523                 gb_addr_config |= ROW_SIZE(0);
2524                 break;
2525         case 2:
2526                 gb_addr_config |= ROW_SIZE(1);
2527                 break;
2528         case 4:
2529                 gb_addr_config |= ROW_SIZE(2);
2530                 break;
2531         }
2532
2533         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2534          * not have bank info, so create a custom tiling dword.
2535          * bits 3:0   num_pipes
2536          * bits 7:4   num_banks
2537          * bits 11:8  group_size
2538          * bits 15:12 row_size
2539          */
2540         rdev->config.si.tile_config = 0;
2541         switch (rdev->config.si.num_tile_pipes) {
2542         case 1:
2543                 rdev->config.si.tile_config |= (0 << 0);
2544                 break;
2545         case 2:
2546                 rdev->config.si.tile_config |= (1 << 0);
2547                 break;
2548         case 4:
2549                 rdev->config.si.tile_config |= (2 << 0);
2550                 break;
2551         case 8:
2552         default:
2553                 /* XXX what about 12? */
2554                 rdev->config.si.tile_config |= (3 << 0);
2555                 break;
2556         }       
2557         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2558         case 0: /* four banks */
2559                 rdev->config.si.tile_config |= 0 << 4;
2560                 break;
2561         case 1: /* eight banks */
2562                 rdev->config.si.tile_config |= 1 << 4;
2563                 break;
2564         case 2: /* sixteen banks */
2565         default:
2566                 rdev->config.si.tile_config |= 2 << 4;
2567                 break;
2568         }
2569         rdev->config.si.tile_config |=
2570                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2571         rdev->config.si.tile_config |=
2572                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2573
2574         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2575         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2576         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2577         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2578         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2579         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2580         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2581         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2582         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2583
2584         si_tiling_mode_table_init(rdev);
2585
2586         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2587                     rdev->config.si.max_sh_per_se,
2588                     rdev->config.si.max_backends_per_se);
2589
2590         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2591                      rdev->config.si.max_sh_per_se,
2592                      rdev->config.si.max_cu_per_sh);
2593
2594
2595         /* set HW defaults for 3D engine */
2596         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2597                                      ROQ_IB2_START(0x2b)));
2598         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2599
2600         sx_debug_1 = RREG32(SX_DEBUG_1);
2601         WREG32(SX_DEBUG_1, sx_debug_1);
2602
2603         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2604
2605         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2606                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2607                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2608                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2609
2610         WREG32(VGT_NUM_INSTANCES, 1);
2611
2612         WREG32(CP_PERFMON_CNTL, 0);
2613
2614         WREG32(SQ_CONFIG, 0);
2615
2616         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2617                                           FORCE_EOV_MAX_REZ_CNT(255)));
2618
2619         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2620                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2621
2622         WREG32(VGT_GS_VERTEX_REUSE, 16);
2623         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2624
2625         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2626         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2627         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2628         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2629         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2630         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2631         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2632         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2633
2634         tmp = RREG32(HDP_MISC_CNTL);
2635         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2636         WREG32(HDP_MISC_CNTL, tmp);
2637
2638         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2639         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2640
2641         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2642
2643         udelay(50);
2644 }
2645
2646 /*
2647  * GPU scratch registers helpers function.
2648  */
2649 static void si_scratch_init(struct radeon_device *rdev)
2650 {
2651         int i;
2652
2653         rdev->scratch.num_reg = 7;
2654         rdev->scratch.reg_base = SCRATCH_REG0;
2655         for (i = 0; i < rdev->scratch.num_reg; i++) {
2656                 rdev->scratch.free[i] = true;
2657                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2658         }
2659 }
2660
2661 void si_fence_ring_emit(struct radeon_device *rdev,
2662                         struct radeon_fence *fence)
2663 {
2664         struct radeon_ring *ring = &rdev->ring[fence->ring];
2665         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2666
2667         /* flush read cache over gart */
2668         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2669         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2670         radeon_ring_write(ring, 0);
2671         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2672         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2673                           PACKET3_TC_ACTION_ENA |
2674                           PACKET3_SH_KCACHE_ACTION_ENA |
2675                           PACKET3_SH_ICACHE_ACTION_ENA);
2676         radeon_ring_write(ring, 0xFFFFFFFF);
2677         radeon_ring_write(ring, 0);
2678         radeon_ring_write(ring, 10); /* poll interval */
2679         /* EVENT_WRITE_EOP - flush caches, send int */
2680         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2681         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2682         radeon_ring_write(ring, addr & 0xffffffff);
2683         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2684         radeon_ring_write(ring, fence->seq);
2685         radeon_ring_write(ring, 0);
2686 }
2687
2688 /*
2689  * IB stuff
2690  */
2691 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2692 {
2693         struct radeon_ring *ring = &rdev->ring[ib->ring];
2694         u32 header;
2695
2696         if (ib->is_const_ib) {
2697                 /* set switch buffer packet before const IB */
2698                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2699                 radeon_ring_write(ring, 0);
2700
2701                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2702         } else {
2703                 u32 next_rptr;
2704                 if (ring->rptr_save_reg) {
2705                         next_rptr = ring->wptr + 3 + 4 + 8;
2706                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2707                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2708                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2709                         radeon_ring_write(ring, next_rptr);
2710                 } else if (rdev->wb.enabled) {
2711                         next_rptr = ring->wptr + 5 + 4 + 8;
2712                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2713                         radeon_ring_write(ring, (1 << 8));
2714                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2715                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2716                         radeon_ring_write(ring, next_rptr);
2717                 }
2718
2719                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2720         }
2721
2722         radeon_ring_write(ring, header);
2723         radeon_ring_write(ring,
2724 #ifdef __BIG_ENDIAN
2725                           (2 << 0) |
2726 #endif
2727                           (ib->gpu_addr & 0xFFFFFFFC));
2728         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2729         radeon_ring_write(ring, ib->length_dw |
2730                           (ib->vm ? (ib->vm->id << 24) : 0));
2731
2732         if (!ib->is_const_ib) {
2733                 /* flush read cache over gart for this vmid */
2734                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2735                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2736                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2737                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2738                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2739                                   PACKET3_TC_ACTION_ENA |
2740                                   PACKET3_SH_KCACHE_ACTION_ENA |
2741                                   PACKET3_SH_ICACHE_ACTION_ENA);
2742                 radeon_ring_write(ring, 0xFFFFFFFF);
2743                 radeon_ring_write(ring, 0);
2744                 radeon_ring_write(ring, 10); /* poll interval */
2745         }
2746 }
2747
2748 /*
2749  * CP.
2750  */
2751 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2752 {
2753         if (enable)
2754                 WREG32(CP_ME_CNTL, 0);
2755         else {
2756                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2757                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2758                 WREG32(SCRATCH_UMSK, 0);
2759                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2760                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2761                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2762         }
2763         udelay(50);
2764 }
2765
2766 static int si_cp_load_microcode(struct radeon_device *rdev)
2767 {
2768         const __be32 *fw_data;
2769         int i;
2770
2771         if (!rdev->me_fw || !rdev->pfp_fw)
2772                 return -EINVAL;
2773
2774         si_cp_enable(rdev, false);
2775
2776         /* PFP */
2777         fw_data = (const __be32 *)rdev->pfp_fw->data;
2778         WREG32(CP_PFP_UCODE_ADDR, 0);
2779         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2780                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2781         WREG32(CP_PFP_UCODE_ADDR, 0);
2782
2783         /* CE */
2784         fw_data = (const __be32 *)rdev->ce_fw->data;
2785         WREG32(CP_CE_UCODE_ADDR, 0);
2786         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2787                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2788         WREG32(CP_CE_UCODE_ADDR, 0);
2789
2790         /* ME */
2791         fw_data = (const __be32 *)rdev->me_fw->data;
2792         WREG32(CP_ME_RAM_WADDR, 0);
2793         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2794                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2795         WREG32(CP_ME_RAM_WADDR, 0);
2796
2797         WREG32(CP_PFP_UCODE_ADDR, 0);
2798         WREG32(CP_CE_UCODE_ADDR, 0);
2799         WREG32(CP_ME_RAM_WADDR, 0);
2800         WREG32(CP_ME_RAM_RADDR, 0);
2801         return 0;
2802 }
2803
2804 static int si_cp_start(struct radeon_device *rdev)
2805 {
2806         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2807         int r, i;
2808
2809         r = radeon_ring_lock(rdev, ring, 7 + 4);
2810         if (r) {
2811                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2812                 return r;
2813         }
2814         /* init the CP */
2815         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2816         radeon_ring_write(ring, 0x1);
2817         radeon_ring_write(ring, 0x0);
2818         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
2819         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2820         radeon_ring_write(ring, 0);
2821         radeon_ring_write(ring, 0);
2822
2823         /* init the CE partitions */
2824         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2825         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2826         radeon_ring_write(ring, 0xc000);
2827         radeon_ring_write(ring, 0xe000);
2828         radeon_ring_unlock_commit(rdev, ring);
2829
2830         si_cp_enable(rdev, true);
2831
2832         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
2833         if (r) {
2834                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2835                 return r;
2836         }
2837
2838         /* setup clear context state */
2839         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2840         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2841
2842         for (i = 0; i < si_default_size; i++)
2843                 radeon_ring_write(ring, si_default_state[i]);
2844
2845         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2846         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2847
2848         /* set clear context state */
2849         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2850         radeon_ring_write(ring, 0);
2851
2852         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2853         radeon_ring_write(ring, 0x00000316);
2854         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2855         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2856
2857         radeon_ring_unlock_commit(rdev, ring);
2858
2859         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
2860                 ring = &rdev->ring[i];
2861                 r = radeon_ring_lock(rdev, ring, 2);
2862
2863                 /* clear the compute context state */
2864                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
2865                 radeon_ring_write(ring, 0);
2866
2867                 radeon_ring_unlock_commit(rdev, ring);
2868         }
2869
2870         return 0;
2871 }
2872
2873 static void si_cp_fini(struct radeon_device *rdev)
2874 {
2875         struct radeon_ring *ring;
2876         si_cp_enable(rdev, false);
2877
2878         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2879         radeon_ring_fini(rdev, ring);
2880         radeon_scratch_free(rdev, ring->rptr_save_reg);
2881
2882         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2883         radeon_ring_fini(rdev, ring);
2884         radeon_scratch_free(rdev, ring->rptr_save_reg);
2885
2886         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2887         radeon_ring_fini(rdev, ring);
2888         radeon_scratch_free(rdev, ring->rptr_save_reg);
2889 }
2890
2891 static int si_cp_resume(struct radeon_device *rdev)
2892 {
2893         struct radeon_ring *ring;
2894         u32 tmp;
2895         u32 rb_bufsz;
2896         int r;
2897
2898         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2899         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2900                                  SOFT_RESET_PA |
2901                                  SOFT_RESET_VGT |
2902                                  SOFT_RESET_SPI |
2903                                  SOFT_RESET_SX));
2904         RREG32(GRBM_SOFT_RESET);
2905         mdelay(15);
2906         WREG32(GRBM_SOFT_RESET, 0);
2907         RREG32(GRBM_SOFT_RESET);
2908
2909         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2910         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2911
2912         /* Set the write pointer delay */
2913         WREG32(CP_RB_WPTR_DELAY, 0);
2914
2915         WREG32(CP_DEBUG, 0);
2916         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2917
2918         /* ring 0 - compute and gfx */
2919         /* Set ring buffer size */
2920         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2921         rb_bufsz = drm_order(ring->ring_size / 8);
2922         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2923 #ifdef __BIG_ENDIAN
2924         tmp |= BUF_SWAP_32BIT;
2925 #endif
2926         WREG32(CP_RB0_CNTL, tmp);
2927
2928         /* Initialize the ring buffer's read and write pointers */
2929         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2930         ring->wptr = 0;
2931         WREG32(CP_RB0_WPTR, ring->wptr);
2932
2933         /* set the wb address whether it's enabled or not */
2934         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2935         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2936
2937         if (rdev->wb.enabled)
2938                 WREG32(SCRATCH_UMSK, 0xff);
2939         else {
2940                 tmp |= RB_NO_UPDATE;
2941                 WREG32(SCRATCH_UMSK, 0);
2942         }
2943
2944         mdelay(1);
2945         WREG32(CP_RB0_CNTL, tmp);
2946
2947         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2948
2949         ring->rptr = RREG32(CP_RB0_RPTR);
2950
2951         /* ring1  - compute only */
2952         /* Set ring buffer size */
2953         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2954         rb_bufsz = drm_order(ring->ring_size / 8);
2955         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2956 #ifdef __BIG_ENDIAN
2957         tmp |= BUF_SWAP_32BIT;
2958 #endif
2959         WREG32(CP_RB1_CNTL, tmp);
2960
2961         /* Initialize the ring buffer's read and write pointers */
2962         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2963         ring->wptr = 0;
2964         WREG32(CP_RB1_WPTR, ring->wptr);
2965
2966         /* set the wb address whether it's enabled or not */
2967         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2968         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2969
2970         mdelay(1);
2971         WREG32(CP_RB1_CNTL, tmp);
2972
2973         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2974
2975         ring->rptr = RREG32(CP_RB1_RPTR);
2976
2977         /* ring2 - compute only */
2978         /* Set ring buffer size */
2979         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2980         rb_bufsz = drm_order(ring->ring_size / 8);
2981         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2982 #ifdef __BIG_ENDIAN
2983         tmp |= BUF_SWAP_32BIT;
2984 #endif
2985         WREG32(CP_RB2_CNTL, tmp);
2986
2987         /* Initialize the ring buffer's read and write pointers */
2988         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2989         ring->wptr = 0;
2990         WREG32(CP_RB2_WPTR, ring->wptr);
2991
2992         /* set the wb address whether it's enabled or not */
2993         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2994         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2995
2996         mdelay(1);
2997         WREG32(CP_RB2_CNTL, tmp);
2998
2999         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3000
3001         ring->rptr = RREG32(CP_RB2_RPTR);
3002
3003         /* start the rings */
3004         si_cp_start(rdev);
3005         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3006         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3007         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3008         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3009         if (r) {
3010                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3011                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3012                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3013                 return r;
3014         }
3015         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3016         if (r) {
3017                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3018         }
3019         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3020         if (r) {
3021                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3022         }
3023
3024         return 0;
3025 }
3026
3027 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3028 {
3029         u32 reset_mask = 0;
3030         u32 tmp;
3031
3032         /* GRBM_STATUS */
3033         tmp = RREG32(GRBM_STATUS);
3034         if (tmp & (PA_BUSY | SC_BUSY |
3035                    BCI_BUSY | SX_BUSY |
3036                    TA_BUSY | VGT_BUSY |
3037                    DB_BUSY | CB_BUSY |
3038                    GDS_BUSY | SPI_BUSY |
3039                    IA_BUSY | IA_BUSY_NO_DMA))
3040                 reset_mask |= RADEON_RESET_GFX;
3041
3042         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3043                    CP_BUSY | CP_COHERENCY_BUSY))
3044                 reset_mask |= RADEON_RESET_CP;
3045
3046         if (tmp & GRBM_EE_BUSY)
3047                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3048
3049         /* GRBM_STATUS2 */
3050         tmp = RREG32(GRBM_STATUS2);
3051         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3052                 reset_mask |= RADEON_RESET_RLC;
3053
3054         /* DMA_STATUS_REG 0 */
3055         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3056         if (!(tmp & DMA_IDLE))
3057                 reset_mask |= RADEON_RESET_DMA;
3058
3059         /* DMA_STATUS_REG 1 */
3060         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3061         if (!(tmp & DMA_IDLE))
3062                 reset_mask |= RADEON_RESET_DMA1;
3063
3064         /* SRBM_STATUS2 */
3065         tmp = RREG32(SRBM_STATUS2);
3066         if (tmp & DMA_BUSY)
3067                 reset_mask |= RADEON_RESET_DMA;
3068
3069         if (tmp & DMA1_BUSY)
3070                 reset_mask |= RADEON_RESET_DMA1;
3071
3072         /* SRBM_STATUS */
3073         tmp = RREG32(SRBM_STATUS);
3074
3075         if (tmp & IH_BUSY)
3076                 reset_mask |= RADEON_RESET_IH;
3077
3078         if (tmp & SEM_BUSY)
3079                 reset_mask |= RADEON_RESET_SEM;
3080
3081         if (tmp & GRBM_RQ_PENDING)
3082                 reset_mask |= RADEON_RESET_GRBM;
3083
3084         if (tmp & VMC_BUSY)
3085                 reset_mask |= RADEON_RESET_VMC;
3086
3087         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3088                    MCC_BUSY | MCD_BUSY))
3089                 reset_mask |= RADEON_RESET_MC;
3090
3091         if (evergreen_is_display_hung(rdev))
3092                 reset_mask |= RADEON_RESET_DISPLAY;
3093
3094         /* VM_L2_STATUS */
3095         tmp = RREG32(VM_L2_STATUS);
3096         if (tmp & L2_BUSY)
3097                 reset_mask |= RADEON_RESET_VMC;
3098
3099         /* Skip MC reset as it's mostly likely not hung, just busy */
3100         if (reset_mask & RADEON_RESET_MC) {
3101                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3102                 reset_mask &= ~RADEON_RESET_MC;
3103         }
3104
3105         return reset_mask;
3106 }
3107
3108 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3109 {
3110         struct evergreen_mc_save save;
3111         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3112         u32 tmp;
3113
3114         if (reset_mask == 0)
3115                 return;
3116
3117         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3118
3119         evergreen_print_gpu_status_regs(rdev);
3120         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3121                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3122         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3123                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3124
3125         /* Disable CP parsing/prefetching */
3126         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3127
3128         if (reset_mask & RADEON_RESET_DMA) {
3129                 /* dma0 */
3130                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3131                 tmp &= ~DMA_RB_ENABLE;
3132                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3133         }
3134         if (reset_mask & RADEON_RESET_DMA1) {
3135                 /* dma1 */
3136                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3137                 tmp &= ~DMA_RB_ENABLE;
3138                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3139         }
3140
3141         udelay(50);
3142
3143         evergreen_mc_stop(rdev, &save);
3144         if (evergreen_mc_wait_for_idle(rdev)) {
3145                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3146         }
3147
3148         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3149                 grbm_soft_reset = SOFT_RESET_CB |
3150                         SOFT_RESET_DB |
3151                         SOFT_RESET_GDS |
3152                         SOFT_RESET_PA |
3153                         SOFT_RESET_SC |
3154                         SOFT_RESET_BCI |
3155                         SOFT_RESET_SPI |
3156                         SOFT_RESET_SX |
3157                         SOFT_RESET_TC |
3158                         SOFT_RESET_TA |
3159                         SOFT_RESET_VGT |
3160                         SOFT_RESET_IA;
3161         }
3162
3163         if (reset_mask & RADEON_RESET_CP) {
3164                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3165
3166                 srbm_soft_reset |= SOFT_RESET_GRBM;
3167         }
3168
3169         if (reset_mask & RADEON_RESET_DMA)
3170                 srbm_soft_reset |= SOFT_RESET_DMA;
3171
3172         if (reset_mask & RADEON_RESET_DMA1)
3173                 srbm_soft_reset |= SOFT_RESET_DMA1;
3174
3175         if (reset_mask & RADEON_RESET_DISPLAY)
3176                 srbm_soft_reset |= SOFT_RESET_DC;
3177
3178         if (reset_mask & RADEON_RESET_RLC)
3179                 grbm_soft_reset |= SOFT_RESET_RLC;
3180
3181         if (reset_mask & RADEON_RESET_SEM)
3182                 srbm_soft_reset |= SOFT_RESET_SEM;
3183
3184         if (reset_mask & RADEON_RESET_IH)
3185                 srbm_soft_reset |= SOFT_RESET_IH;
3186
3187         if (reset_mask & RADEON_RESET_GRBM)
3188                 srbm_soft_reset |= SOFT_RESET_GRBM;
3189
3190         if (reset_mask & RADEON_RESET_VMC)
3191                 srbm_soft_reset |= SOFT_RESET_VMC;
3192
3193         if (reset_mask & RADEON_RESET_MC)
3194                 srbm_soft_reset |= SOFT_RESET_MC;
3195
3196         if (grbm_soft_reset) {
3197                 tmp = RREG32(GRBM_SOFT_RESET);
3198                 tmp |= grbm_soft_reset;
3199                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3200                 WREG32(GRBM_SOFT_RESET, tmp);
3201                 tmp = RREG32(GRBM_SOFT_RESET);
3202
3203                 udelay(50);
3204
3205                 tmp &= ~grbm_soft_reset;
3206                 WREG32(GRBM_SOFT_RESET, tmp);
3207                 tmp = RREG32(GRBM_SOFT_RESET);
3208         }
3209
3210         if (srbm_soft_reset) {
3211                 tmp = RREG32(SRBM_SOFT_RESET);
3212                 tmp |= srbm_soft_reset;
3213                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3214                 WREG32(SRBM_SOFT_RESET, tmp);
3215                 tmp = RREG32(SRBM_SOFT_RESET);
3216
3217                 udelay(50);
3218
3219                 tmp &= ~srbm_soft_reset;
3220                 WREG32(SRBM_SOFT_RESET, tmp);
3221                 tmp = RREG32(SRBM_SOFT_RESET);
3222         }
3223
3224         /* Wait a little for things to settle down */
3225         udelay(50);
3226
3227         evergreen_mc_resume(rdev, &save);
3228         udelay(50);
3229
3230         evergreen_print_gpu_status_regs(rdev);
3231 }
3232
3233 int si_asic_reset(struct radeon_device *rdev)
3234 {
3235         u32 reset_mask;
3236
3237         reset_mask = si_gpu_check_soft_reset(rdev);
3238
3239         if (reset_mask)
3240                 r600_set_bios_scratch_engine_hung(rdev, true);
3241
3242         si_gpu_soft_reset(rdev, reset_mask);
3243
3244         reset_mask = si_gpu_check_soft_reset(rdev);
3245
3246         if (!reset_mask)
3247                 r600_set_bios_scratch_engine_hung(rdev, false);
3248
3249         return 0;
3250 }
3251
3252 /**
3253  * si_gfx_is_lockup - Check if the GFX engine is locked up
3254  *
3255  * @rdev: radeon_device pointer
3256  * @ring: radeon_ring structure holding ring information
3257  *
3258  * Check if the GFX engine is locked up.
3259  * Returns true if the engine appears to be locked up, false if not.
3260  */
3261 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3262 {
3263         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3264
3265         if (!(reset_mask & (RADEON_RESET_GFX |
3266                             RADEON_RESET_COMPUTE |
3267                             RADEON_RESET_CP))) {
3268                 radeon_ring_lockup_update(ring);
3269                 return false;
3270         }
3271         /* force CP activities */
3272         radeon_ring_force_activity(rdev, ring);
3273         return radeon_ring_test_lockup(rdev, ring);
3274 }
3275
3276 /**
3277  * si_dma_is_lockup - Check if the DMA engine is locked up
3278  *
3279  * @rdev: radeon_device pointer
3280  * @ring: radeon_ring structure holding ring information
3281  *
3282  * Check if the async DMA engine is locked up.
3283  * Returns true if the engine appears to be locked up, false if not.
3284  */
3285 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3286 {
3287         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3288         u32 mask;
3289
3290         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3291                 mask = RADEON_RESET_DMA;
3292         else
3293                 mask = RADEON_RESET_DMA1;
3294
3295         if (!(reset_mask & mask)) {
3296                 radeon_ring_lockup_update(ring);
3297                 return false;
3298         }
3299         /* force ring activities */
3300         radeon_ring_force_activity(rdev, ring);
3301         return radeon_ring_test_lockup(rdev, ring);
3302 }
3303
3304 /* MC */
3305 static void si_mc_program(struct radeon_device *rdev)
3306 {
3307         struct evergreen_mc_save save;
3308         u32 tmp;
3309         int i, j;
3310
3311         /* Initialize HDP */
3312         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3313                 WREG32((0x2c14 + j), 0x00000000);
3314                 WREG32((0x2c18 + j), 0x00000000);
3315                 WREG32((0x2c1c + j), 0x00000000);
3316                 WREG32((0x2c20 + j), 0x00000000);
3317                 WREG32((0x2c24 + j), 0x00000000);
3318         }
3319         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3320
3321         evergreen_mc_stop(rdev, &save);
3322         if (radeon_mc_wait_for_idle(rdev)) {
3323                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3324         }
3325         /* Lockout access through VGA aperture*/
3326         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3327         /* Update configuration */
3328         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3329                rdev->mc.vram_start >> 12);
3330         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3331                rdev->mc.vram_end >> 12);
3332         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3333                rdev->vram_scratch.gpu_addr >> 12);
3334         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3335         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3336         WREG32(MC_VM_FB_LOCATION, tmp);
3337         /* XXX double check these! */
3338         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3339         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3340         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3341         WREG32(MC_VM_AGP_BASE, 0);
3342         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3343         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3344         if (radeon_mc_wait_for_idle(rdev)) {
3345                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3346         }
3347         evergreen_mc_resume(rdev, &save);
3348         /* we need to own VRAM, so turn off the VGA renderer here
3349          * to stop it overwriting our objects */
3350         rv515_vga_render_disable(rdev);
3351 }
3352
3353 static void si_vram_gtt_location(struct radeon_device *rdev,
3354                                  struct radeon_mc *mc)
3355 {
3356         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3357                 /* leave room for at least 1024M GTT */
3358                 dev_warn(rdev->dev, "limiting VRAM\n");
3359                 mc->real_vram_size = 0xFFC0000000ULL;
3360                 mc->mc_vram_size = 0xFFC0000000ULL;
3361         }
3362         radeon_vram_location(rdev, &rdev->mc, 0);
3363         rdev->mc.gtt_base_align = 0;
3364         radeon_gtt_location(rdev, mc);
3365 }
3366
3367 static int si_mc_init(struct radeon_device *rdev)
3368 {
3369         u32 tmp;
3370         int chansize, numchan;
3371
3372         /* Get VRAM informations */
3373         rdev->mc.vram_is_ddr = true;
3374         tmp = RREG32(MC_ARB_RAMCFG);
3375         if (tmp & CHANSIZE_OVERRIDE) {
3376                 chansize = 16;
3377         } else if (tmp & CHANSIZE_MASK) {
3378                 chansize = 64;
3379         } else {
3380                 chansize = 32;
3381         }
3382         tmp = RREG32(MC_SHARED_CHMAP);
3383         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3384         case 0:
3385         default:
3386                 numchan = 1;
3387                 break;
3388         case 1:
3389                 numchan = 2;
3390                 break;
3391         case 2:
3392                 numchan = 4;
3393                 break;
3394         case 3:
3395                 numchan = 8;
3396                 break;
3397         case 4:
3398                 numchan = 3;
3399                 break;
3400         case 5:
3401                 numchan = 6;
3402                 break;
3403         case 6:
3404                 numchan = 10;
3405                 break;
3406         case 7:
3407                 numchan = 12;
3408                 break;
3409         case 8:
3410                 numchan = 16;
3411                 break;
3412         }
3413         rdev->mc.vram_width = numchan * chansize;
3414         /* Could aper size report 0 ? */
3415         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3416         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3417         /* size in MB on si */
3418         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3419         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3420         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3421         si_vram_gtt_location(rdev, &rdev->mc);
3422         radeon_update_bandwidth_info(rdev);
3423
3424         return 0;
3425 }
3426
3427 /*
3428  * GART
3429  */
3430 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3431 {
3432         /* flush hdp cache */
3433         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3434
3435         /* bits 0-15 are the VM contexts0-15 */
3436         WREG32(VM_INVALIDATE_REQUEST, 1);
3437 }
3438
3439 static int si_pcie_gart_enable(struct radeon_device *rdev)
3440 {
3441         int r, i;
3442
3443         if (rdev->gart.robj == NULL) {
3444                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3445                 return -EINVAL;
3446         }
3447         r = radeon_gart_table_vram_pin(rdev);
3448         if (r)
3449                 return r;
3450         radeon_gart_restore(rdev);
3451         /* Setup TLB control */
3452         WREG32(MC_VM_MX_L1_TLB_CNTL,
3453                (0xA << 7) |
3454                ENABLE_L1_TLB |
3455                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3456                ENABLE_ADVANCED_DRIVER_MODEL |
3457                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3458         /* Setup L2 cache */
3459         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3460                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3461                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3462                EFFECTIVE_L2_QUEUE_SIZE(7) |
3463                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3464         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3465         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3466                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3467         /* setup context0 */
3468         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3469         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3470         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3471         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3472                         (u32)(rdev->dummy_page.addr >> 12));
3473         WREG32(VM_CONTEXT0_CNTL2, 0);
3474         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3475                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3476
3477         WREG32(0x15D4, 0);
3478         WREG32(0x15D8, 0);
3479         WREG32(0x15DC, 0);
3480
3481         /* empty context1-15 */
3482         /* set vm size, must be a multiple of 4 */
3483         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3484         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3485         /* Assign the pt base to something valid for now; the pts used for
3486          * the VMs are determined by the application and setup and assigned
3487          * on the fly in the vm part of radeon_gart.c
3488          */
3489         for (i = 1; i < 16; i++) {
3490                 if (i < 8)
3491                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3492                                rdev->gart.table_addr >> 12);
3493                 else
3494                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3495                                rdev->gart.table_addr >> 12);
3496         }
3497
3498         /* enable context1-15 */
3499         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3500                (u32)(rdev->dummy_page.addr >> 12));
3501         WREG32(VM_CONTEXT1_CNTL2, 4);
3502         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3503                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3504                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3505                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3506                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3507                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3508                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3509                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3510                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3511                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3512                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3513                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3514                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3515
3516         si_pcie_gart_tlb_flush(rdev);
3517         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3518                  (unsigned)(rdev->mc.gtt_size >> 20),
3519                  (unsigned long long)rdev->gart.table_addr);
3520         rdev->gart.ready = true;
3521         return 0;
3522 }
3523
3524 static void si_pcie_gart_disable(struct radeon_device *rdev)
3525 {
3526         /* Disable all tables */
3527         WREG32(VM_CONTEXT0_CNTL, 0);
3528         WREG32(VM_CONTEXT1_CNTL, 0);
3529         /* Setup TLB control */
3530         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3531                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3532         /* Setup L2 cache */
3533         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3534                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3535                EFFECTIVE_L2_QUEUE_SIZE(7) |
3536                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3537         WREG32(VM_L2_CNTL2, 0);
3538         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3539                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3540         radeon_gart_table_vram_unpin(rdev);
3541 }
3542
3543 static void si_pcie_gart_fini(struct radeon_device *rdev)
3544 {
3545         si_pcie_gart_disable(rdev);
3546         radeon_gart_table_vram_free(rdev);
3547         radeon_gart_fini(rdev);
3548 }
3549
3550 /* vm parser */
3551 static bool si_vm_reg_valid(u32 reg)
3552 {
3553         /* context regs are fine */
3554         if (reg >= 0x28000)
3555                 return true;
3556
3557         /* check config regs */
3558         switch (reg) {
3559         case GRBM_GFX_INDEX:
3560         case CP_STRMOUT_CNTL:
3561         case VGT_VTX_VECT_EJECT_REG:
3562         case VGT_CACHE_INVALIDATION:
3563         case VGT_ESGS_RING_SIZE:
3564         case VGT_GSVS_RING_SIZE:
3565         case VGT_GS_VERTEX_REUSE:
3566         case VGT_PRIMITIVE_TYPE:
3567         case VGT_INDEX_TYPE:
3568         case VGT_NUM_INDICES:
3569         case VGT_NUM_INSTANCES:
3570         case VGT_TF_RING_SIZE:
3571         case VGT_HS_OFFCHIP_PARAM:
3572         case VGT_TF_MEMORY_BASE:
3573         case PA_CL_ENHANCE:
3574         case PA_SU_LINE_STIPPLE_VALUE:
3575         case PA_SC_LINE_STIPPLE_STATE:
3576         case PA_SC_ENHANCE:
3577         case SQC_CACHES:
3578         case SPI_STATIC_THREAD_MGMT_1:
3579         case SPI_STATIC_THREAD_MGMT_2:
3580         case SPI_STATIC_THREAD_MGMT_3:
3581         case SPI_PS_MAX_WAVE_ID:
3582         case SPI_CONFIG_CNTL:
3583         case SPI_CONFIG_CNTL_1:
3584         case TA_CNTL_AUX:
3585                 return true;
3586         default:
3587                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3588                 return false;
3589         }
3590 }
3591
3592 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3593                                   u32 *ib, struct radeon_cs_packet *pkt)
3594 {
3595         switch (pkt->opcode) {
3596         case PACKET3_NOP:
3597         case PACKET3_SET_BASE:
3598         case PACKET3_SET_CE_DE_COUNTERS:
3599         case PACKET3_LOAD_CONST_RAM:
3600         case PACKET3_WRITE_CONST_RAM:
3601         case PACKET3_WRITE_CONST_RAM_OFFSET:
3602         case PACKET3_DUMP_CONST_RAM:
3603         case PACKET3_INCREMENT_CE_COUNTER:
3604         case PACKET3_WAIT_ON_DE_COUNTER:
3605         case PACKET3_CE_WRITE:
3606                 break;
3607         default:
3608                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3609                 return -EINVAL;
3610         }
3611         return 0;
3612 }
3613
3614 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3615                                    u32 *ib, struct radeon_cs_packet *pkt)
3616 {
3617         u32 idx = pkt->idx + 1;
3618         u32 idx_value = ib[idx];
3619         u32 start_reg, end_reg, reg, i;
3620         u32 command, info;
3621
3622         switch (pkt->opcode) {
3623         case PACKET3_NOP:
3624         case PACKET3_SET_BASE:
3625         case PACKET3_CLEAR_STATE:
3626         case PACKET3_INDEX_BUFFER_SIZE:
3627         case PACKET3_DISPATCH_DIRECT:
3628         case PACKET3_DISPATCH_INDIRECT:
3629         case PACKET3_ALLOC_GDS:
3630         case PACKET3_WRITE_GDS_RAM:
3631         case PACKET3_ATOMIC_GDS:
3632         case PACKET3_ATOMIC:
3633         case PACKET3_OCCLUSION_QUERY:
3634         case PACKET3_SET_PREDICATION:
3635         case PACKET3_COND_EXEC:
3636         case PACKET3_PRED_EXEC:
3637         case PACKET3_DRAW_INDIRECT:
3638         case PACKET3_DRAW_INDEX_INDIRECT:
3639         case PACKET3_INDEX_BASE:
3640         case PACKET3_DRAW_INDEX_2:
3641         case PACKET3_CONTEXT_CONTROL:
3642         case PACKET3_INDEX_TYPE:
3643         case PACKET3_DRAW_INDIRECT_MULTI:
3644         case PACKET3_DRAW_INDEX_AUTO:
3645         case PACKET3_DRAW_INDEX_IMMD:
3646         case PACKET3_NUM_INSTANCES:
3647         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3648         case PACKET3_STRMOUT_BUFFER_UPDATE:
3649         case PACKET3_DRAW_INDEX_OFFSET_2:
3650         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3651         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3652         case PACKET3_MPEG_INDEX:
3653         case PACKET3_WAIT_REG_MEM:
3654         case PACKET3_MEM_WRITE:
3655         case PACKET3_PFP_SYNC_ME:
3656         case PACKET3_SURFACE_SYNC:
3657         case PACKET3_EVENT_WRITE:
3658         case PACKET3_EVENT_WRITE_EOP:
3659         case PACKET3_EVENT_WRITE_EOS:
3660         case PACKET3_SET_CONTEXT_REG:
3661         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3662         case PACKET3_SET_SH_REG:
3663         case PACKET3_SET_SH_REG_OFFSET:
3664         case PACKET3_INCREMENT_DE_COUNTER:
3665         case PACKET3_WAIT_ON_CE_COUNTER:
3666         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3667         case PACKET3_ME_WRITE:
3668                 break;
3669         case PACKET3_COPY_DATA:
3670                 if ((idx_value & 0xf00) == 0) {
3671                         reg = ib[idx + 3] * 4;
3672                         if (!si_vm_reg_valid(reg))
3673                                 return -EINVAL;
3674                 }
3675                 break;
3676         case PACKET3_WRITE_DATA:
3677                 if ((idx_value & 0xf00) == 0) {
3678                         start_reg = ib[idx + 1] * 4;
3679                         if (idx_value & 0x10000) {
3680                                 if (!si_vm_reg_valid(start_reg))
3681                                         return -EINVAL;
3682                         } else {
3683                                 for (i = 0; i < (pkt->count - 2); i++) {
3684                                         reg = start_reg + (4 * i);
3685                                         if (!si_vm_reg_valid(reg))
3686                                                 return -EINVAL;
3687                                 }
3688                         }
3689                 }
3690                 break;
3691         case PACKET3_COND_WRITE:
3692                 if (idx_value & 0x100) {
3693                         reg = ib[idx + 5] * 4;
3694                         if (!si_vm_reg_valid(reg))
3695                                 return -EINVAL;
3696                 }
3697                 break;
3698         case PACKET3_COPY_DW:
3699                 if (idx_value & 0x2) {
3700                         reg = ib[idx + 3] * 4;
3701                         if (!si_vm_reg_valid(reg))
3702                                 return -EINVAL;
3703                 }
3704                 break;
3705         case PACKET3_SET_CONFIG_REG:
3706                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3707                 end_reg = 4 * pkt->count + start_reg - 4;
3708                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3709                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3710                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3711                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3712                         return -EINVAL;
3713                 }
3714                 for (i = 0; i < pkt->count; i++) {
3715                         reg = start_reg + (4 * i);
3716                         if (!si_vm_reg_valid(reg))
3717                                 return -EINVAL;
3718                 }
3719                 break;
3720         case PACKET3_CP_DMA:
3721                 command = ib[idx + 4];
3722                 info = ib[idx + 1];
3723                 if (command & PACKET3_CP_DMA_CMD_SAS) {
3724                         /* src address space is register */
3725                         if (((info & 0x60000000) >> 29) == 0) {
3726                                 start_reg = idx_value << 2;
3727                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3728                                         reg = start_reg;
3729                                         if (!si_vm_reg_valid(reg)) {
3730                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3731                                                 return -EINVAL;
3732                                         }
3733                                 } else {
3734                                         for (i = 0; i < (command & 0x1fffff); i++) {
3735                                                 reg = start_reg + (4 * i);
3736                                                 if (!si_vm_reg_valid(reg)) {
3737                                                         DRM_ERROR("CP DMA Bad SRC register\n");
3738                                                         return -EINVAL;
3739                                                 }
3740                                         }
3741                                 }
3742                         }
3743                 }
3744                 if (command & PACKET3_CP_DMA_CMD_DAS) {
3745                         /* dst address space is register */
3746                         if (((info & 0x00300000) >> 20) == 0) {
3747                                 start_reg = ib[idx + 2];
3748                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3749                                         reg = start_reg;
3750                                         if (!si_vm_reg_valid(reg)) {
3751                                                 DRM_ERROR("CP DMA Bad DST register\n");
3752                                                 return -EINVAL;
3753                                         }
3754                                 } else {
3755                                         for (i = 0; i < (command & 0x1fffff); i++) {
3756                                                 reg = start_reg + (4 * i);
3757                                                 if (!si_vm_reg_valid(reg)) {
3758                                                         DRM_ERROR("CP DMA Bad DST register\n");
3759                                                         return -EINVAL;
3760                                                 }
3761                                         }
3762                                 }
3763                         }
3764                 }
3765                 break;
3766         default:
3767                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3768                 return -EINVAL;
3769         }
3770         return 0;
3771 }
3772
3773 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3774                                        u32 *ib, struct radeon_cs_packet *pkt)
3775 {
3776         u32 idx = pkt->idx + 1;
3777         u32 idx_value = ib[idx];
3778         u32 start_reg, reg, i;
3779
3780         switch (pkt->opcode) {
3781         case PACKET3_NOP:
3782         case PACKET3_SET_BASE:
3783         case PACKET3_CLEAR_STATE:
3784         case PACKET3_DISPATCH_DIRECT:
3785         case PACKET3_DISPATCH_INDIRECT:
3786         case PACKET3_ALLOC_GDS:
3787         case PACKET3_WRITE_GDS_RAM:
3788         case PACKET3_ATOMIC_GDS:
3789         case PACKET3_ATOMIC:
3790         case PACKET3_OCCLUSION_QUERY:
3791         case PACKET3_SET_PREDICATION:
3792         case PACKET3_COND_EXEC:
3793         case PACKET3_PRED_EXEC:
3794         case PACKET3_CONTEXT_CONTROL:
3795         case PACKET3_STRMOUT_BUFFER_UPDATE:
3796         case PACKET3_WAIT_REG_MEM:
3797         case PACKET3_MEM_WRITE:
3798         case PACKET3_PFP_SYNC_ME:
3799         case PACKET3_SURFACE_SYNC:
3800         case PACKET3_EVENT_WRITE:
3801         case PACKET3_EVENT_WRITE_EOP:
3802         case PACKET3_EVENT_WRITE_EOS:
3803         case PACKET3_SET_CONTEXT_REG:
3804         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3805         case PACKET3_SET_SH_REG:
3806         case PACKET3_SET_SH_REG_OFFSET:
3807         case PACKET3_INCREMENT_DE_COUNTER:
3808         case PACKET3_WAIT_ON_CE_COUNTER:
3809         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3810         case PACKET3_ME_WRITE:
3811                 break;
3812         case PACKET3_COPY_DATA:
3813                 if ((idx_value & 0xf00) == 0) {
3814                         reg = ib[idx + 3] * 4;
3815                         if (!si_vm_reg_valid(reg))
3816                                 return -EINVAL;
3817                 }
3818                 break;
3819         case PACKET3_WRITE_DATA:
3820                 if ((idx_value & 0xf00) == 0) {
3821                         start_reg = ib[idx + 1] * 4;
3822                         if (idx_value & 0x10000) {
3823                                 if (!si_vm_reg_valid(start_reg))
3824                                         return -EINVAL;
3825                         } else {
3826                                 for (i = 0; i < (pkt->count - 2); i++) {
3827                                         reg = start_reg + (4 * i);
3828                                         if (!si_vm_reg_valid(reg))
3829                                                 return -EINVAL;
3830                                 }
3831                         }
3832                 }
3833                 break;
3834         case PACKET3_COND_WRITE:
3835                 if (idx_value & 0x100) {
3836                         reg = ib[idx + 5] * 4;
3837                         if (!si_vm_reg_valid(reg))
3838                                 return -EINVAL;
3839                 }
3840                 break;
3841         case PACKET3_COPY_DW:
3842                 if (idx_value & 0x2) {
3843                         reg = ib[idx + 3] * 4;
3844                         if (!si_vm_reg_valid(reg))
3845                                 return -EINVAL;
3846                 }
3847                 break;
3848         default:
3849                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
3850                 return -EINVAL;
3851         }
3852         return 0;
3853 }
3854
3855 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3856 {
3857         int ret = 0;
3858         u32 idx = 0;
3859         struct radeon_cs_packet pkt;
3860
3861         do {
3862                 pkt.idx = idx;
3863                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3864                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3865                 pkt.one_reg_wr = 0;
3866                 switch (pkt.type) {
3867                 case RADEON_PACKET_TYPE0:
3868                         dev_err(rdev->dev, "Packet0 not allowed!\n");
3869                         ret = -EINVAL;
3870                         break;
3871                 case RADEON_PACKET_TYPE2:
3872                         idx += 1;
3873                         break;
3874                 case RADEON_PACKET_TYPE3:
3875                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3876                         if (ib->is_const_ib)
3877                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
3878                         else {
3879                                 switch (ib->ring) {
3880                                 case RADEON_RING_TYPE_GFX_INDEX:
3881                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
3882                                         break;
3883                                 case CAYMAN_RING_TYPE_CP1_INDEX:
3884                                 case CAYMAN_RING_TYPE_CP2_INDEX:
3885                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
3886                                         break;
3887                                 default:
3888                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
3889                                         ret = -EINVAL;
3890                                         break;
3891                                 }
3892                         }
3893                         idx += pkt.count + 2;
3894                         break;
3895                 default:
3896                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3897                         ret = -EINVAL;
3898                         break;
3899                 }
3900                 if (ret)
3901                         break;
3902         } while (idx < ib->length_dw);
3903
3904         return ret;
3905 }
3906
3907 /*
3908  * vm
3909  */
3910 int si_vm_init(struct radeon_device *rdev)
3911 {
3912         /* number of VMs */
3913         rdev->vm_manager.nvm = 16;
3914         /* base offset of vram pages */
3915         rdev->vm_manager.vram_base_offset = 0;
3916
3917         return 0;
3918 }
3919
3920 void si_vm_fini(struct radeon_device *rdev)
3921 {
3922 }
3923
3924 /**
3925  * si_vm_set_page - update the page tables using the CP
3926  *
3927  * @rdev: radeon_device pointer
3928  * @ib: indirect buffer to fill with commands
3929  * @pe: addr of the page entry
3930  * @addr: dst addr to write into pe
3931  * @count: number of page entries to update
3932  * @incr: increase next addr by incr bytes
3933  * @flags: access flags
3934  *
3935  * Update the page tables using the CP (SI).
3936  */
3937 void si_vm_set_page(struct radeon_device *rdev,
3938                     struct radeon_ib *ib,
3939                     uint64_t pe,
3940                     uint64_t addr, unsigned count,
3941                     uint32_t incr, uint32_t flags)
3942 {
3943         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3944         uint64_t value;
3945         unsigned ndw;
3946
3947         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3948                 while (count) {
3949                         ndw = 2 + count * 2;
3950                         if (ndw > 0x3FFE)
3951                                 ndw = 0x3FFE;
3952
3953                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3954                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3955                                         WRITE_DATA_DST_SEL(1));
3956                         ib->ptr[ib->length_dw++] = pe;
3957                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3958                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3959                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3960                                         value = radeon_vm_map_gart(rdev, addr);
3961                                         value &= 0xFFFFFFFFFFFFF000ULL;
3962                                 } else if (flags & RADEON_VM_PAGE_VALID) {
3963                                         value = addr;
3964                                 } else {
3965                                         value = 0;
3966                                 }
3967                                 addr += incr;
3968                                 value |= r600_flags;
3969                                 ib->ptr[ib->length_dw++] = value;
3970                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3971                         }
3972                 }
3973         } else {
3974                 /* DMA */
3975                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3976                         while (count) {
3977                                 ndw = count * 2;
3978                                 if (ndw > 0xFFFFE)
3979                                         ndw = 0xFFFFE;
3980
3981                                 /* for non-physically contiguous pages (system) */
3982                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
3983                                 ib->ptr[ib->length_dw++] = pe;
3984                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3985                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3986                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
3987                                                 value = radeon_vm_map_gart(rdev, addr);
3988                                                 value &= 0xFFFFFFFFFFFFF000ULL;
3989                                         } else if (flags & RADEON_VM_PAGE_VALID) {
3990                                                 value = addr;
3991                                         } else {
3992                                                 value = 0;
3993                                         }
3994                                         addr += incr;
3995                                         value |= r600_flags;
3996                                         ib->ptr[ib->length_dw++] = value;
3997                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
3998                                 }
3999                         }
4000                 } else {
4001                         while (count) {
4002                                 ndw = count * 2;
4003                                 if (ndw > 0xFFFFE)
4004                                         ndw = 0xFFFFE;
4005
4006                                 if (flags & RADEON_VM_PAGE_VALID)
4007                                         value = addr;
4008                                 else
4009                                         value = 0;
4010                                 /* for physically contiguous pages (vram) */
4011                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4012                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4013                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4014                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4015                                 ib->ptr[ib->length_dw++] = 0;
4016                                 ib->ptr[ib->length_dw++] = value; /* value */
4017                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4018                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4019                                 ib->ptr[ib->length_dw++] = 0;
4020                                 pe += ndw * 4;
4021                                 addr += (ndw / 2) * incr;
4022                                 count -= ndw / 2;
4023                         }
4024                 }
4025                 while (ib->length_dw & 0x7)
4026                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4027         }
4028 }
4029
4030 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4031 {
4032         struct radeon_ring *ring = &rdev->ring[ridx];
4033
4034         if (vm == NULL)
4035                 return;
4036
4037         /* write new base address */
4038         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4039         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4040                                  WRITE_DATA_DST_SEL(0)));
4041
4042         if (vm->id < 8) {
4043                 radeon_ring_write(ring,
4044                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4045         } else {
4046                 radeon_ring_write(ring,
4047                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4048         }
4049         radeon_ring_write(ring, 0);
4050         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4051
4052         /* flush hdp cache */
4053         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4054         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4055                                  WRITE_DATA_DST_SEL(0)));
4056         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4057         radeon_ring_write(ring, 0);
4058         radeon_ring_write(ring, 0x1);
4059
4060         /* bits 0-15 are the VM contexts0-15 */
4061         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4062         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4063                                  WRITE_DATA_DST_SEL(0)));
4064         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4065         radeon_ring_write(ring, 0);
4066         radeon_ring_write(ring, 1 << vm->id);
4067
4068         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4069         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4070         radeon_ring_write(ring, 0x0);
4071 }
4072
4073 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4074 {
4075         struct radeon_ring *ring = &rdev->ring[ridx];
4076
4077         if (vm == NULL)
4078                 return;
4079
4080         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4081         if (vm->id < 8) {
4082                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4083         } else {
4084                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4085         }
4086         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4087
4088         /* flush hdp cache */
4089         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4090         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4091         radeon_ring_write(ring, 1);
4092
4093         /* bits 0-7 are the VM contexts0-7 */
4094         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4095         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4096         radeon_ring_write(ring, 1 << vm->id);
4097 }
4098
4099 /*
4100  * RLC
4101  */
4102 void si_rlc_fini(struct radeon_device *rdev)
4103 {
4104         int r;
4105
4106         /* save restore block */
4107         if (rdev->rlc.save_restore_obj) {
4108                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4109                 if (unlikely(r != 0))
4110                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4111                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4112                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4113
4114                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4115                 rdev->rlc.save_restore_obj = NULL;
4116         }
4117
4118         /* clear state block */
4119         if (rdev->rlc.clear_state_obj) {
4120                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4121                 if (unlikely(r != 0))
4122                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4123                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4124                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4125
4126                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4127                 rdev->rlc.clear_state_obj = NULL;
4128         }
4129 }
4130
4131 int si_rlc_init(struct radeon_device *rdev)
4132 {
4133         int r;
4134
4135         /* save restore block */
4136         if (rdev->rlc.save_restore_obj == NULL) {
4137                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4138                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4139                                      &rdev->rlc.save_restore_obj);
4140                 if (r) {
4141                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4142                         return r;
4143                 }
4144         }
4145
4146         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4147         if (unlikely(r != 0)) {
4148                 si_rlc_fini(rdev);
4149                 return r;
4150         }
4151         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4152                           &rdev->rlc.save_restore_gpu_addr);
4153         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4154         if (r) {
4155                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4156                 si_rlc_fini(rdev);
4157                 return r;
4158         }
4159
4160         /* clear state block */
4161         if (rdev->rlc.clear_state_obj == NULL) {
4162                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4163                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4164                                      &rdev->rlc.clear_state_obj);
4165                 if (r) {
4166                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4167                         si_rlc_fini(rdev);
4168                         return r;
4169                 }
4170         }
4171         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4172         if (unlikely(r != 0)) {
4173                 si_rlc_fini(rdev);
4174                 return r;
4175         }
4176         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4177                           &rdev->rlc.clear_state_gpu_addr);
4178         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4179         if (r) {
4180                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4181                 si_rlc_fini(rdev);
4182                 return r;
4183         }
4184
4185         return 0;
4186 }
4187
4188 static void si_rlc_stop(struct radeon_device *rdev)
4189 {
4190         WREG32(RLC_CNTL, 0);
4191 }
4192
4193 static void si_rlc_start(struct radeon_device *rdev)
4194 {
4195         WREG32(RLC_CNTL, RLC_ENABLE);
4196 }
4197
4198 static int si_rlc_resume(struct radeon_device *rdev)
4199 {
4200         u32 i;
4201         const __be32 *fw_data;
4202
4203         if (!rdev->rlc_fw)
4204                 return -EINVAL;
4205
4206         si_rlc_stop(rdev);
4207
4208         WREG32(RLC_RL_BASE, 0);
4209         WREG32(RLC_RL_SIZE, 0);
4210         WREG32(RLC_LB_CNTL, 0);
4211         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4212         WREG32(RLC_LB_CNTR_INIT, 0);
4213
4214         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4215         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4216
4217         WREG32(RLC_MC_CNTL, 0);
4218         WREG32(RLC_UCODE_CNTL, 0);
4219
4220         fw_data = (const __be32 *)rdev->rlc_fw->data;
4221         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4222                 WREG32(RLC_UCODE_ADDR, i);
4223                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4224         }
4225         WREG32(RLC_UCODE_ADDR, 0);
4226
4227         si_rlc_start(rdev);
4228
4229         return 0;
4230 }
4231
4232 static void si_enable_interrupts(struct radeon_device *rdev)
4233 {
4234         u32 ih_cntl = RREG32(IH_CNTL);
4235         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4236
4237         ih_cntl |= ENABLE_INTR;
4238         ih_rb_cntl |= IH_RB_ENABLE;
4239         WREG32(IH_CNTL, ih_cntl);
4240         WREG32(IH_RB_CNTL, ih_rb_cntl);
4241         rdev->ih.enabled = true;
4242 }
4243
4244 static void si_disable_interrupts(struct radeon_device *rdev)
4245 {
4246         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4247         u32 ih_cntl = RREG32(IH_CNTL);
4248
4249         ih_rb_cntl &= ~IH_RB_ENABLE;
4250         ih_cntl &= ~ENABLE_INTR;
4251         WREG32(IH_RB_CNTL, ih_rb_cntl);
4252         WREG32(IH_CNTL, ih_cntl);
4253         /* set rptr, wptr to 0 */
4254         WREG32(IH_RB_RPTR, 0);
4255         WREG32(IH_RB_WPTR, 0);
4256         rdev->ih.enabled = false;
4257         rdev->ih.rptr = 0;
4258 }
4259
4260 static void si_disable_interrupt_state(struct radeon_device *rdev)
4261 {
4262         u32 tmp;
4263
4264         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4265         WREG32(CP_INT_CNTL_RING1, 0);
4266         WREG32(CP_INT_CNTL_RING2, 0);
4267         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4268         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4269         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4270         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4271         WREG32(GRBM_INT_CNTL, 0);
4272         WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4273         WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4274         if (rdev->num_crtc >= 4) {
4275                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4276                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4277         }
4278         if (rdev->num_crtc >= 6) {
4279                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4280                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4281         }
4282
4283         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4284         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4285         if (rdev->num_crtc >= 4) {
4286                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4287                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4288         }
4289         if (rdev->num_crtc >= 6) {
4290                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4291                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4292         }
4293
4294         WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4295
4296         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4297         WREG32(DC_HPD1_INT_CONTROL, tmp);
4298         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4299         WREG32(DC_HPD2_INT_CONTROL, tmp);
4300         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4301         WREG32(DC_HPD3_INT_CONTROL, tmp);
4302         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4303         WREG32(DC_HPD4_INT_CONTROL, tmp);
4304         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4305         WREG32(DC_HPD5_INT_CONTROL, tmp);
4306         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4307         WREG32(DC_HPD6_INT_CONTROL, tmp);
4308
4309 }
4310
4311 static int si_irq_init(struct radeon_device *rdev)
4312 {
4313         int ret = 0;
4314         int rb_bufsz;
4315         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4316
4317         /* allocate ring */
4318         ret = r600_ih_ring_alloc(rdev);
4319         if (ret)
4320                 return ret;
4321
4322         /* disable irqs */
4323         si_disable_interrupts(rdev);
4324
4325         /* init rlc */
4326         ret = si_rlc_resume(rdev);
4327         if (ret) {
4328                 r600_ih_ring_fini(rdev);
4329                 return ret;
4330         }
4331
4332         /* setup interrupt control */
4333         /* set dummy read address to ring address */
4334         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4335         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4336         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4337          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4338          */
4339         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4340         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4341         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4342         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4343
4344         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4345         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4346
4347         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4348                       IH_WPTR_OVERFLOW_CLEAR |
4349                       (rb_bufsz << 1));
4350
4351         if (rdev->wb.enabled)
4352                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4353
4354         /* set the writeback address whether it's enabled or not */
4355         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4356         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4357
4358         WREG32(IH_RB_CNTL, ih_rb_cntl);
4359
4360         /* set rptr, wptr to 0 */
4361         WREG32(IH_RB_RPTR, 0);
4362         WREG32(IH_RB_WPTR, 0);
4363
4364         /* Default settings for IH_CNTL (disabled at first) */
4365         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4366         /* RPTR_REARM only works if msi's are enabled */
4367         if (rdev->msi_enabled)
4368                 ih_cntl |= RPTR_REARM;
4369         WREG32(IH_CNTL, ih_cntl);
4370
4371         /* force the active interrupt state to all disabled */
4372         si_disable_interrupt_state(rdev);
4373
4374         pci_set_master(rdev->pdev);
4375
4376         /* enable irqs */
4377         si_enable_interrupts(rdev);
4378
4379         return ret;
4380 }
4381
4382 int si_irq_set(struct radeon_device *rdev)
4383 {
4384         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4385         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4386         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4387         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4388         u32 grbm_int_cntl = 0;
4389         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4390         u32 dma_cntl, dma_cntl1;
4391
4392         if (!rdev->irq.installed) {
4393                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4394                 return -EINVAL;
4395         }
4396         /* don't enable anything if the ih is disabled */
4397         if (!rdev->ih.enabled) {
4398                 si_disable_interrupts(rdev);
4399                 /* force the active interrupt state to all disabled */
4400                 si_disable_interrupt_state(rdev);
4401                 return 0;
4402         }
4403
4404         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4405         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4406         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4407         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4408         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4409         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4410
4411         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4412         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4413
4414         /* enable CP interrupts on all rings */
4415         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4416                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4417                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4418         }
4419         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4420                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4421                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4422         }
4423         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4424                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4425                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4426         }
4427         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4428                 DRM_DEBUG("si_irq_set: sw int dma\n");
4429                 dma_cntl |= TRAP_ENABLE;
4430         }
4431
4432         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4433                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4434                 dma_cntl1 |= TRAP_ENABLE;
4435         }
4436         if (rdev->irq.crtc_vblank_int[0] ||
4437             atomic_read(&rdev->irq.pflip[0])) {
4438                 DRM_DEBUG("si_irq_set: vblank 0\n");
4439                 crtc1 |= VBLANK_INT_MASK;
4440         }
4441         if (rdev->irq.crtc_vblank_int[1] ||
4442             atomic_read(&rdev->irq.pflip[1])) {
4443                 DRM_DEBUG("si_irq_set: vblank 1\n");
4444                 crtc2 |= VBLANK_INT_MASK;
4445         }
4446         if (rdev->irq.crtc_vblank_int[2] ||
4447             atomic_read(&rdev->irq.pflip[2])) {
4448                 DRM_DEBUG("si_irq_set: vblank 2\n");
4449                 crtc3 |= VBLANK_INT_MASK;
4450         }
4451         if (rdev->irq.crtc_vblank_int[3] ||
4452             atomic_read(&rdev->irq.pflip[3])) {
4453                 DRM_DEBUG("si_irq_set: vblank 3\n");
4454                 crtc4 |= VBLANK_INT_MASK;
4455         }
4456         if (rdev->irq.crtc_vblank_int[4] ||
4457             atomic_read(&rdev->irq.pflip[4])) {
4458                 DRM_DEBUG("si_irq_set: vblank 4\n");
4459                 crtc5 |= VBLANK_INT_MASK;
4460         }
4461         if (rdev->irq.crtc_vblank_int[5] ||
4462             atomic_read(&rdev->irq.pflip[5])) {
4463                 DRM_DEBUG("si_irq_set: vblank 5\n");
4464                 crtc6 |= VBLANK_INT_MASK;
4465         }
4466         if (rdev->irq.hpd[0]) {
4467                 DRM_DEBUG("si_irq_set: hpd 1\n");
4468                 hpd1 |= DC_HPDx_INT_EN;
4469         }
4470         if (rdev->irq.hpd[1]) {
4471                 DRM_DEBUG("si_irq_set: hpd 2\n");
4472                 hpd2 |= DC_HPDx_INT_EN;
4473         }
4474         if (rdev->irq.hpd[2]) {
4475                 DRM_DEBUG("si_irq_set: hpd 3\n");
4476                 hpd3 |= DC_HPDx_INT_EN;
4477         }
4478         if (rdev->irq.hpd[3]) {
4479                 DRM_DEBUG("si_irq_set: hpd 4\n");
4480                 hpd4 |= DC_HPDx_INT_EN;
4481         }
4482         if (rdev->irq.hpd[4]) {
4483                 DRM_DEBUG("si_irq_set: hpd 5\n");
4484                 hpd5 |= DC_HPDx_INT_EN;
4485         }
4486         if (rdev->irq.hpd[5]) {
4487                 DRM_DEBUG("si_irq_set: hpd 6\n");
4488                 hpd6 |= DC_HPDx_INT_EN;
4489         }
4490
4491         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4492         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4493         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4494
4495         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4496         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4497
4498         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4499
4500         WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4501         WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4502         if (rdev->num_crtc >= 4) {
4503                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4504                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4505         }
4506         if (rdev->num_crtc >= 6) {
4507                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4508                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4509         }
4510
4511         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4512         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4513         if (rdev->num_crtc >= 4) {
4514                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4515                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4516         }
4517         if (rdev->num_crtc >= 6) {
4518                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4519                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4520         }
4521
4522         WREG32(DC_HPD1_INT_CONTROL, hpd1);
4523         WREG32(DC_HPD2_INT_CONTROL, hpd2);
4524         WREG32(DC_HPD3_INT_CONTROL, hpd3);
4525         WREG32(DC_HPD4_INT_CONTROL, hpd4);
4526         WREG32(DC_HPD5_INT_CONTROL, hpd5);
4527         WREG32(DC_HPD6_INT_CONTROL, hpd6);
4528
4529         return 0;
4530 }
4531
4532 static inline void si_irq_ack(struct radeon_device *rdev)
4533 {
4534         u32 tmp;
4535
4536         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4537         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4538         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4539         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4540         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4541         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4542         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4543         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4544         if (rdev->num_crtc >= 4) {
4545                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4546                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4547         }
4548         if (rdev->num_crtc >= 6) {
4549                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4550                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4551         }
4552
4553         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4554                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4555         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4556                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4557         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4558                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4559         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4560                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4561         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4562                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4563         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4564                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4565
4566         if (rdev->num_crtc >= 4) {
4567                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4568                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4569                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4570                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4571                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4572                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4573                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4574                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4575                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4576                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4577                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4578                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4579         }
4580
4581         if (rdev->num_crtc >= 6) {
4582                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4583                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4584                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4585                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4586                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4587                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4588                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4589                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4590                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4591                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4592                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4593                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4594         }
4595
4596         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4597                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4598                 tmp |= DC_HPDx_INT_ACK;
4599                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4600         }
4601         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4602                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4603                 tmp |= DC_HPDx_INT_ACK;
4604                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4605         }
4606         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4607                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4608                 tmp |= DC_HPDx_INT_ACK;
4609                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4610         }
4611         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4612                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4613                 tmp |= DC_HPDx_INT_ACK;
4614                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4615         }
4616         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4617                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4618                 tmp |= DC_HPDx_INT_ACK;
4619                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4620         }
4621         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4622                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4623                 tmp |= DC_HPDx_INT_ACK;
4624                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4625         }
4626 }
4627
4628 static void si_irq_disable(struct radeon_device *rdev)
4629 {
4630         si_disable_interrupts(rdev);
4631         /* Wait and acknowledge irq */
4632         mdelay(1);
4633         si_irq_ack(rdev);
4634         si_disable_interrupt_state(rdev);
4635 }
4636
4637 static void si_irq_suspend(struct radeon_device *rdev)
4638 {
4639         si_irq_disable(rdev);
4640         si_rlc_stop(rdev);
4641 }
4642
4643 static void si_irq_fini(struct radeon_device *rdev)
4644 {
4645         si_irq_suspend(rdev);
4646         r600_ih_ring_fini(rdev);
4647 }
4648
4649 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4650 {
4651         u32 wptr, tmp;
4652
4653         if (rdev->wb.enabled)
4654                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4655         else
4656                 wptr = RREG32(IH_RB_WPTR);
4657
4658         if (wptr & RB_OVERFLOW) {
4659                 /* When a ring buffer overflow happen start parsing interrupt
4660                  * from the last not overwritten vector (wptr + 16). Hopefully
4661                  * this should allow us to catchup.
4662                  */
4663                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4664                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4665                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4666                 tmp = RREG32(IH_RB_CNTL);
4667                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4668                 WREG32(IH_RB_CNTL, tmp);
4669         }
4670         return (wptr & rdev->ih.ptr_mask);
4671 }
4672
4673 /*        SI IV Ring
4674  * Each IV ring entry is 128 bits:
4675  * [7:0]    - interrupt source id
4676  * [31:8]   - reserved
4677  * [59:32]  - interrupt source data
4678  * [63:60]  - reserved
4679  * [71:64]  - RINGID
4680  * [79:72]  - VMID
4681  * [127:80] - reserved
4682  */
4683 int si_irq_process(struct radeon_device *rdev)
4684 {
4685         u32 wptr;
4686         u32 rptr;
4687         u32 src_id, src_data, ring_id;
4688         u32 ring_index;
4689         bool queue_hotplug = false;
4690
4691         if (!rdev->ih.enabled || rdev->shutdown)
4692                 return IRQ_NONE;
4693
4694         wptr = si_get_ih_wptr(rdev);
4695
4696 restart_ih:
4697         /* is somebody else already processing irqs? */
4698         if (atomic_xchg(&rdev->ih.lock, 1))
4699                 return IRQ_NONE;
4700
4701         rptr = rdev->ih.rptr;
4702         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4703
4704         /* Order reading of wptr vs. reading of IH ring data */
4705         rmb();
4706
4707         /* display interrupts */
4708         si_irq_ack(rdev);
4709
4710         while (rptr != wptr) {
4711                 /* wptr/rptr are in bytes! */
4712                 ring_index = rptr / 4;
4713                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4714                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4715                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4716
4717                 switch (src_id) {
4718                 case 1: /* D1 vblank/vline */
4719                         switch (src_data) {
4720                         case 0: /* D1 vblank */
4721                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4722                                         if (rdev->irq.crtc_vblank_int[0]) {
4723                                                 drm_handle_vblank(rdev->ddev, 0);
4724                                                 rdev->pm.vblank_sync = true;
4725                                                 wake_up(&rdev->irq.vblank_queue);
4726                                         }
4727                                         if (atomic_read(&rdev->irq.pflip[0]))
4728                                                 radeon_crtc_handle_flip(rdev, 0);
4729                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4730                                         DRM_DEBUG("IH: D1 vblank\n");
4731                                 }
4732                                 break;
4733                         case 1: /* D1 vline */
4734                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4735                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4736                                         DRM_DEBUG("IH: D1 vline\n");
4737                                 }
4738                                 break;
4739                         default:
4740                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4741                                 break;
4742                         }
4743                         break;
4744                 case 2: /* D2 vblank/vline */
4745                         switch (src_data) {
4746                         case 0: /* D2 vblank */
4747                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4748                                         if (rdev->irq.crtc_vblank_int[1]) {
4749                                                 drm_handle_vblank(rdev->ddev, 1);
4750                                                 rdev->pm.vblank_sync = true;
4751                                                 wake_up(&rdev->irq.vblank_queue);
4752                                         }
4753                                         if (atomic_read(&rdev->irq.pflip[1]))
4754                                                 radeon_crtc_handle_flip(rdev, 1);
4755                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4756                                         DRM_DEBUG("IH: D2 vblank\n");
4757                                 }
4758                                 break;
4759                         case 1: /* D2 vline */
4760                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4761                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4762                                         DRM_DEBUG("IH: D2 vline\n");
4763                                 }
4764                                 break;
4765                         default:
4766                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4767                                 break;
4768                         }
4769                         break;
4770                 case 3: /* D3 vblank/vline */
4771                         switch (src_data) {
4772                         case 0: /* D3 vblank */
4773                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4774                                         if (rdev->irq.crtc_vblank_int[2]) {
4775                                                 drm_handle_vblank(rdev->ddev, 2);
4776                                                 rdev->pm.vblank_sync = true;
4777                                                 wake_up(&rdev->irq.vblank_queue);
4778                                         }
4779                                         if (atomic_read(&rdev->irq.pflip[2]))
4780                                                 radeon_crtc_handle_flip(rdev, 2);
4781                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4782                                         DRM_DEBUG("IH: D3 vblank\n");
4783                                 }
4784                                 break;
4785                         case 1: /* D3 vline */
4786                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4787                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4788                                         DRM_DEBUG("IH: D3 vline\n");
4789                                 }
4790                                 break;
4791                         default:
4792                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4793                                 break;
4794                         }
4795                         break;
4796                 case 4: /* D4 vblank/vline */
4797                         switch (src_data) {
4798                         case 0: /* D4 vblank */
4799                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4800                                         if (rdev->irq.crtc_vblank_int[3]) {
4801                                                 drm_handle_vblank(rdev->ddev, 3);
4802                                                 rdev->pm.vblank_sync = true;
4803                                                 wake_up(&rdev->irq.vblank_queue);
4804                                         }
4805                                         if (atomic_read(&rdev->irq.pflip[3]))
4806                                                 radeon_crtc_handle_flip(rdev, 3);
4807                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4808                                         DRM_DEBUG("IH: D4 vblank\n");
4809                                 }
4810                                 break;
4811                         case 1: /* D4 vline */
4812                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4813                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4814                                         DRM_DEBUG("IH: D4 vline\n");
4815                                 }
4816                                 break;
4817                         default:
4818                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4819                                 break;
4820                         }
4821                         break;
4822                 case 5: /* D5 vblank/vline */
4823                         switch (src_data) {
4824                         case 0: /* D5 vblank */
4825                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4826                                         if (rdev->irq.crtc_vblank_int[4]) {
4827                                                 drm_handle_vblank(rdev->ddev, 4);
4828                                                 rdev->pm.vblank_sync = true;
4829                                                 wake_up(&rdev->irq.vblank_queue);
4830                                         }
4831                                         if (atomic_read(&rdev->irq.pflip[4]))
4832                                                 radeon_crtc_handle_flip(rdev, 4);
4833                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4834                                         DRM_DEBUG("IH: D5 vblank\n");
4835                                 }
4836                                 break;
4837                         case 1: /* D5 vline */
4838                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4839                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4840                                         DRM_DEBUG("IH: D5 vline\n");
4841                                 }
4842                                 break;
4843                         default:
4844                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4845                                 break;
4846                         }
4847                         break;
4848                 case 6: /* D6 vblank/vline */
4849                         switch (src_data) {
4850                         case 0: /* D6 vblank */
4851                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4852                                         if (rdev->irq.crtc_vblank_int[5]) {
4853                                                 drm_handle_vblank(rdev->ddev, 5);
4854                                                 rdev->pm.vblank_sync = true;
4855                                                 wake_up(&rdev->irq.vblank_queue);
4856                                         }
4857                                         if (atomic_read(&rdev->irq.pflip[5]))
4858                                                 radeon_crtc_handle_flip(rdev, 5);
4859                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4860                                         DRM_DEBUG("IH: D6 vblank\n");
4861                                 }
4862                                 break;
4863                         case 1: /* D6 vline */
4864                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4865                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4866                                         DRM_DEBUG("IH: D6 vline\n");
4867                                 }
4868                                 break;
4869                         default:
4870                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4871                                 break;
4872                         }
4873                         break;
4874                 case 42: /* HPD hotplug */
4875                         switch (src_data) {
4876                         case 0:
4877                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4878                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
4879                                         queue_hotplug = true;
4880                                         DRM_DEBUG("IH: HPD1\n");
4881                                 }
4882                                 break;
4883                         case 1:
4884                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4885                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4886                                         queue_hotplug = true;
4887                                         DRM_DEBUG("IH: HPD2\n");
4888                                 }
4889                                 break;
4890                         case 2:
4891                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4892                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4893                                         queue_hotplug = true;
4894                                         DRM_DEBUG("IH: HPD3\n");
4895                                 }
4896                                 break;
4897                         case 3:
4898                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4899                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4900                                         queue_hotplug = true;
4901                                         DRM_DEBUG("IH: HPD4\n");
4902                                 }
4903                                 break;
4904                         case 4:
4905                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4906                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4907                                         queue_hotplug = true;
4908                                         DRM_DEBUG("IH: HPD5\n");
4909                                 }
4910                                 break;
4911                         case 5:
4912                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4913                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4914                                         queue_hotplug = true;
4915                                         DRM_DEBUG("IH: HPD6\n");
4916                                 }
4917                                 break;
4918                         default:
4919                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4920                                 break;
4921                         }
4922                         break;
4923                 case 146:
4924                 case 147:
4925                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4926                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4927                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4928                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4929                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4930                         /* reset addr and status */
4931                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4932                         break;
4933                 case 176: /* RINGID0 CP_INT */
4934                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4935                         break;
4936                 case 177: /* RINGID1 CP_INT */
4937                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4938                         break;
4939                 case 178: /* RINGID2 CP_INT */
4940                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4941                         break;
4942                 case 181: /* CP EOP event */
4943                         DRM_DEBUG("IH: CP EOP\n");
4944                         switch (ring_id) {
4945                         case 0:
4946                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4947                                 break;
4948                         case 1:
4949                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4950                                 break;
4951                         case 2:
4952                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4953                                 break;
4954                         }
4955                         break;
4956                 case 224: /* DMA trap event */
4957                         DRM_DEBUG("IH: DMA trap\n");
4958                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4959                         break;
4960                 case 233: /* GUI IDLE */
4961                         DRM_DEBUG("IH: GUI idle\n");
4962                         break;
4963                 case 244: /* DMA trap event */
4964                         DRM_DEBUG("IH: DMA1 trap\n");
4965                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4966                         break;
4967                 default:
4968                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4969                         break;
4970                 }
4971
4972                 /* wptr/rptr are in bytes! */
4973                 rptr += 16;
4974                 rptr &= rdev->ih.ptr_mask;
4975         }
4976         if (queue_hotplug)
4977                 schedule_work(&rdev->hotplug_work);
4978         rdev->ih.rptr = rptr;
4979         WREG32(IH_RB_RPTR, rdev->ih.rptr);
4980         atomic_set(&rdev->ih.lock, 0);
4981
4982         /* make sure wptr hasn't changed while processing */
4983         wptr = si_get_ih_wptr(rdev);
4984         if (wptr != rptr)
4985                 goto restart_ih;
4986
4987         return IRQ_HANDLED;
4988 }
4989
4990 /**
4991  * si_copy_dma - copy pages using the DMA engine
4992  *
4993  * @rdev: radeon_device pointer
4994  * @src_offset: src GPU address
4995  * @dst_offset: dst GPU address
4996  * @num_gpu_pages: number of GPU pages to xfer
4997  * @fence: radeon fence object
4998  *
4999  * Copy GPU paging using the DMA engine (SI).
5000  * Used by the radeon ttm implementation to move pages if
5001  * registered as the asic copy callback.
5002  */
5003 int si_copy_dma(struct radeon_device *rdev,
5004                 uint64_t src_offset, uint64_t dst_offset,
5005                 unsigned num_gpu_pages,
5006                 struct radeon_fence **fence)
5007 {
5008         struct radeon_semaphore *sem = NULL;
5009         int ring_index = rdev->asic->copy.dma_ring_index;
5010         struct radeon_ring *ring = &rdev->ring[ring_index];
5011         u32 size_in_bytes, cur_size_in_bytes;
5012         int i, num_loops;
5013         int r = 0;
5014
5015         r = radeon_semaphore_create(rdev, &sem);
5016         if (r) {
5017                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5018                 return r;
5019         }
5020
5021         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5022         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5023         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5024         if (r) {
5025                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5026                 radeon_semaphore_free(rdev, &sem, NULL);
5027                 return r;
5028         }
5029
5030         if (radeon_fence_need_sync(*fence, ring->idx)) {
5031                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5032                                             ring->idx);
5033                 radeon_fence_note_sync(*fence, ring->idx);
5034         } else {
5035                 radeon_semaphore_free(rdev, &sem, NULL);
5036         }
5037
5038         for (i = 0; i < num_loops; i++) {
5039                 cur_size_in_bytes = size_in_bytes;
5040                 if (cur_size_in_bytes > 0xFFFFF)
5041                         cur_size_in_bytes = 0xFFFFF;
5042                 size_in_bytes -= cur_size_in_bytes;
5043                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5044                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5045                 radeon_ring_write(ring, src_offset & 0xffffffff);
5046                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5047                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5048                 src_offset += cur_size_in_bytes;
5049                 dst_offset += cur_size_in_bytes;
5050         }
5051
5052         r = radeon_fence_emit(rdev, fence, ring->idx);
5053         if (r) {
5054                 radeon_ring_unlock_undo(rdev, ring);
5055                 return r;
5056         }
5057
5058         radeon_ring_unlock_commit(rdev, ring);
5059         radeon_semaphore_free(rdev, &sem, *fence);
5060
5061         return r;
5062 }
5063
5064 /*
5065  * startup/shutdown callbacks
5066  */
5067 static int si_startup(struct radeon_device *rdev)
5068 {
5069         struct radeon_ring *ring;
5070         int r;
5071
5072         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5073             !rdev->rlc_fw || !rdev->mc_fw) {
5074                 r = si_init_microcode(rdev);
5075                 if (r) {
5076                         DRM_ERROR("Failed to load firmware!\n");
5077                         return r;
5078                 }
5079         }
5080
5081         r = si_mc_load_microcode(rdev);
5082         if (r) {
5083                 DRM_ERROR("Failed to load MC firmware!\n");
5084                 return r;
5085         }
5086
5087         r = r600_vram_scratch_init(rdev);
5088         if (r)
5089                 return r;
5090
5091         si_mc_program(rdev);
5092         r = si_pcie_gart_enable(rdev);
5093         if (r)
5094                 return r;
5095         si_gpu_init(rdev);
5096
5097         /* allocate rlc buffers */
5098         r = si_rlc_init(rdev);
5099         if (r) {
5100                 DRM_ERROR("Failed to init rlc BOs!\n");
5101                 return r;
5102         }
5103
5104         /* allocate wb buffer */
5105         r = radeon_wb_init(rdev);
5106         if (r)
5107                 return r;
5108
5109         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5110         if (r) {
5111                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5112                 return r;
5113         }
5114
5115         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5116         if (r) {
5117                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5118                 return r;
5119         }
5120
5121         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5122         if (r) {
5123                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5124                 return r;
5125         }
5126
5127         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5128         if (r) {
5129                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5130                 return r;
5131         }
5132
5133         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5134         if (r) {
5135                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5136                 return r;
5137         }
5138
5139         r = rv770_uvd_resume(rdev);
5140         if (!r) {
5141                 r = radeon_fence_driver_start_ring(rdev,
5142                                                    R600_RING_TYPE_UVD_INDEX);
5143                 if (r)
5144                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5145         }
5146         if (r)
5147                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5148
5149         /* Enable IRQ */
5150         r = si_irq_init(rdev);
5151         if (r) {
5152                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5153                 radeon_irq_kms_fini(rdev);
5154                 return r;
5155         }
5156         si_irq_set(rdev);
5157
5158         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5159         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5160                              CP_RB0_RPTR, CP_RB0_WPTR,
5161                              0, 0xfffff, RADEON_CP_PACKET2);
5162         if (r)
5163                 return r;
5164
5165         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5166         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5167                              CP_RB1_RPTR, CP_RB1_WPTR,
5168                              0, 0xfffff, RADEON_CP_PACKET2);
5169         if (r)
5170                 return r;
5171
5172         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5173         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5174                              CP_RB2_RPTR, CP_RB2_WPTR,
5175                              0, 0xfffff, RADEON_CP_PACKET2);
5176         if (r)
5177                 return r;
5178
5179         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5180         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5181                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5182                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5183                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5184         if (r)
5185                 return r;
5186
5187         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5188         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5189                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5190                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5191                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5192         if (r)
5193                 return r;
5194
5195         r = si_cp_load_microcode(rdev);
5196         if (r)
5197                 return r;
5198         r = si_cp_resume(rdev);
5199         if (r)
5200                 return r;
5201
5202         r = cayman_dma_resume(rdev);
5203         if (r)
5204                 return r;
5205
5206         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5207         if (ring->ring_size) {
5208                 r = radeon_ring_init(rdev, ring, ring->ring_size,
5209                                      R600_WB_UVD_RPTR_OFFSET,
5210                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5211                                      0, 0xfffff, RADEON_CP_PACKET2);
5212                 if (!r)
5213                         r = r600_uvd_init(rdev);
5214                 if (r)
5215                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5216         }
5217
5218         r = radeon_ib_pool_init(rdev);
5219         if (r) {
5220                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5221                 return r;
5222         }
5223
5224         r = radeon_vm_manager_init(rdev);
5225         if (r) {
5226                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5227                 return r;
5228         }
5229
5230         return 0;
5231 }
5232
5233 int si_resume(struct radeon_device *rdev)
5234 {
5235         int r;
5236
5237         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5238          * posting will perform necessary task to bring back GPU into good
5239          * shape.
5240          */
5241         /* post card */
5242         atom_asic_init(rdev->mode_info.atom_context);
5243
5244         /* init golden registers */
5245         si_init_golden_registers(rdev);
5246
5247         rdev->accel_working = true;
5248         r = si_startup(rdev);
5249         if (r) {
5250                 DRM_ERROR("si startup failed on resume\n");
5251                 rdev->accel_working = false;
5252                 return r;
5253         }
5254
5255         return r;
5256
5257 }
5258
5259 int si_suspend(struct radeon_device *rdev)
5260 {
5261         radeon_vm_manager_fini(rdev);
5262         si_cp_enable(rdev, false);
5263         cayman_dma_stop(rdev);
5264         r600_uvd_rbc_stop(rdev);
5265         radeon_uvd_suspend(rdev);
5266         si_irq_suspend(rdev);
5267         radeon_wb_disable(rdev);
5268         si_pcie_gart_disable(rdev);
5269         return 0;
5270 }
5271
5272 /* Plan is to move initialization in that function and use
5273  * helper function so that radeon_device_init pretty much
5274  * do nothing more than calling asic specific function. This
5275  * should also allow to remove a bunch of callback function
5276  * like vram_info.
5277  */
5278 int si_init(struct radeon_device *rdev)
5279 {
5280         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5281         int r;
5282
5283         /* Read BIOS */
5284         if (!radeon_get_bios(rdev)) {
5285                 if (ASIC_IS_AVIVO(rdev))
5286                         return -EINVAL;
5287         }
5288         /* Must be an ATOMBIOS */
5289         if (!rdev->is_atom_bios) {
5290                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5291                 return -EINVAL;
5292         }
5293         r = radeon_atombios_init(rdev);
5294         if (r)
5295                 return r;
5296
5297         /* Post card if necessary */
5298         if (!radeon_card_posted(rdev)) {
5299                 if (!rdev->bios) {
5300                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5301                         return -EINVAL;
5302                 }
5303                 DRM_INFO("GPU not posted. posting now...\n");
5304                 atom_asic_init(rdev->mode_info.atom_context);
5305         }
5306         /* init golden registers */
5307         si_init_golden_registers(rdev);
5308         /* Initialize scratch registers */
5309         si_scratch_init(rdev);
5310         /* Initialize surface registers */
5311         radeon_surface_init(rdev);
5312         /* Initialize clocks */
5313         radeon_get_clock_info(rdev->ddev);
5314
5315         /* Fence driver */
5316         r = radeon_fence_driver_init(rdev);
5317         if (r)
5318                 return r;
5319
5320         /* initialize memory controller */
5321         r = si_mc_init(rdev);
5322         if (r)
5323                 return r;
5324         /* Memory manager */
5325         r = radeon_bo_init(rdev);
5326         if (r)
5327                 return r;
5328
5329         r = radeon_irq_kms_init(rdev);
5330         if (r)
5331                 return r;
5332
5333         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5334         ring->ring_obj = NULL;
5335         r600_ring_init(rdev, ring, 1024 * 1024);
5336
5337         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5338         ring->ring_obj = NULL;
5339         r600_ring_init(rdev, ring, 1024 * 1024);
5340
5341         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5342         ring->ring_obj = NULL;
5343         r600_ring_init(rdev, ring, 1024 * 1024);
5344
5345         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5346         ring->ring_obj = NULL;
5347         r600_ring_init(rdev, ring, 64 * 1024);
5348
5349         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5350         ring->ring_obj = NULL;
5351         r600_ring_init(rdev, ring, 64 * 1024);
5352
5353         r = radeon_uvd_init(rdev);
5354         if (!r) {
5355                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5356                 ring->ring_obj = NULL;
5357                 r600_ring_init(rdev, ring, 4096);
5358         }
5359
5360         rdev->ih.ring_obj = NULL;
5361         r600_ih_ring_init(rdev, 64 * 1024);
5362
5363         r = r600_pcie_gart_init(rdev);
5364         if (r)
5365                 return r;
5366
5367         rdev->accel_working = true;
5368         r = si_startup(rdev);
5369         if (r) {
5370                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5371                 si_cp_fini(rdev);
5372                 cayman_dma_fini(rdev);
5373                 si_irq_fini(rdev);
5374                 si_rlc_fini(rdev);
5375                 radeon_wb_fini(rdev);
5376                 radeon_ib_pool_fini(rdev);
5377                 radeon_vm_manager_fini(rdev);
5378                 radeon_irq_kms_fini(rdev);
5379                 si_pcie_gart_fini(rdev);
5380                 rdev->accel_working = false;
5381         }
5382
5383         /* Don't start up if the MC ucode is missing.
5384          * The default clocks and voltages before the MC ucode
5385          * is loaded are not suffient for advanced operations.
5386          */
5387         if (!rdev->mc_fw) {
5388                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5389                 return -EINVAL;
5390         }
5391
5392         return 0;
5393 }
5394
5395 void si_fini(struct radeon_device *rdev)
5396 {
5397         si_cp_fini(rdev);
5398         cayman_dma_fini(rdev);
5399         si_irq_fini(rdev);
5400         si_rlc_fini(rdev);
5401         radeon_wb_fini(rdev);
5402         radeon_vm_manager_fini(rdev);
5403         radeon_ib_pool_fini(rdev);
5404         radeon_irq_kms_fini(rdev);
5405         radeon_uvd_fini(rdev);
5406         si_pcie_gart_fini(rdev);
5407         r600_vram_scratch_fini(rdev);
5408         radeon_gem_fini(rdev);
5409         radeon_fence_driver_fini(rdev);
5410         radeon_bo_fini(rdev);
5411         radeon_atombios_fini(rdev);
5412         kfree(rdev->bios);
5413         rdev->bios = NULL;
5414 }
5415
5416 /**
5417  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5418  *
5419  * @rdev: radeon_device pointer
5420  *
5421  * Fetches a GPU clock counter snapshot (SI).
5422  * Returns the 64 bit clock counter snapshot.
5423  */
5424 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5425 {
5426         uint64_t clock;
5427
5428         mutex_lock(&rdev->gpu_clock_mutex);
5429         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5430         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5431                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5432         mutex_unlock(&rdev->gpu_clock_mutex);
5433         return clock;
5434 }
5435
5436 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5437 {
5438         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5439         int r;
5440
5441         /* bypass vclk and dclk with bclk */
5442         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5443                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5444                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5445
5446         /* put PLL in bypass mode */
5447         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5448
5449         if (!vclk || !dclk) {
5450                 /* keep the Bypass mode, put PLL to sleep */
5451                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5452                 return 0;
5453         }
5454
5455         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5456                                           16384, 0x03FFFFFF, 0, 128, 5,
5457                                           &fb_div, &vclk_div, &dclk_div);
5458         if (r)
5459                 return r;
5460
5461         /* set RESET_ANTI_MUX to 0 */
5462         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5463
5464         /* set VCO_MODE to 1 */
5465         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5466
5467         /* toggle UPLL_SLEEP to 1 then back to 0 */
5468         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5469         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5470
5471         /* deassert UPLL_RESET */
5472         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5473
5474         mdelay(1);
5475
5476         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5477         if (r)
5478                 return r;
5479
5480         /* assert UPLL_RESET again */
5481         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5482
5483         /* disable spread spectrum. */
5484         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5485
5486         /* set feedback divider */
5487         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5488
5489         /* set ref divider to 0 */
5490         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5491
5492         if (fb_div < 307200)
5493                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5494         else
5495                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5496
5497         /* set PDIV_A and PDIV_B */
5498         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5499                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5500                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5501
5502         /* give the PLL some time to settle */
5503         mdelay(15);
5504
5505         /* deassert PLL_RESET */
5506         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5507
5508         mdelay(15);
5509
5510         /* switch from bypass mode to normal mode */
5511         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5512
5513         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5514         if (r)
5515                 return r;
5516
5517         /* switch VCLK and DCLK selection */
5518         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5519                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5520                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5521
5522         mdelay(100);
5523
5524         return 0;
5525 }