23d56c689e1208549dba374fac8665c7dccddc16
[linux-2.6-block.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116
117 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
118 extern void r600_ih_ring_fini(struct radeon_device *rdev);
119 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
120 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
125 extern void si_rlc_reset(struct radeon_device *rdev);
126 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
127 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
128 extern int cik_sdma_resume(struct radeon_device *rdev);
129 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
130 extern void cik_sdma_fini(struct radeon_device *rdev);
131 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
132 static void cik_rlc_stop(struct radeon_device *rdev);
133 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
134 static void cik_program_aspm(struct radeon_device *rdev);
135 static void cik_init_pg(struct radeon_device *rdev);
136 static void cik_init_cg(struct radeon_device *rdev);
137 static void cik_fini_pg(struct radeon_device *rdev);
138 static void cik_fini_cg(struct radeon_device *rdev);
139 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
140                                           bool enable);
141
142 /* get temperature in millidegrees */
143 int ci_get_temp(struct radeon_device *rdev)
144 {
145         u32 temp;
146         int actual_temp = 0;
147
148         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
149                 CTF_TEMP_SHIFT;
150
151         if (temp & 0x200)
152                 actual_temp = 255;
153         else
154                 actual_temp = temp & 0x1ff;
155
156         actual_temp = actual_temp * 1000;
157
158         return actual_temp;
159 }
160
161 /* get temperature in millidegrees */
162 int kv_get_temp(struct radeon_device *rdev)
163 {
164         u32 temp;
165         int actual_temp = 0;
166
167         temp = RREG32_SMC(0xC0300E0C);
168
169         if (temp)
170                 actual_temp = (temp / 8) - 49;
171         else
172                 actual_temp = 0;
173
174         actual_temp = actual_temp * 1000;
175
176         return actual_temp;
177 }
178
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
183 {
184         unsigned long flags;
185         u32 r;
186
187         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
188         WREG32(PCIE_INDEX, reg);
189         (void)RREG32(PCIE_INDEX);
190         r = RREG32(PCIE_DATA);
191         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
192         return r;
193 }
194
195 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
196 {
197         unsigned long flags;
198
199         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
200         WREG32(PCIE_INDEX, reg);
201         (void)RREG32(PCIE_INDEX);
202         WREG32(PCIE_DATA, v);
203         (void)RREG32(PCIE_DATA);
204         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
205 }
206
207 static const u32 spectre_rlc_save_restore_register_list[] =
208 {
209         (0x0e00 << 16) | (0xc12c >> 2),
210         0x00000000,
211         (0x0e00 << 16) | (0xc140 >> 2),
212         0x00000000,
213         (0x0e00 << 16) | (0xc150 >> 2),
214         0x00000000,
215         (0x0e00 << 16) | (0xc15c >> 2),
216         0x00000000,
217         (0x0e00 << 16) | (0xc168 >> 2),
218         0x00000000,
219         (0x0e00 << 16) | (0xc170 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0xc178 >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0xc204 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0xc2b4 >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0xc2b8 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc2bc >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc2c0 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0x8228 >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0x829c >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0x869c >> 2),
238         0x00000000,
239         (0x0600 << 16) | (0x98f4 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0x98f8 >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0x9900 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc260 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0x90e8 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0x3c000 >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0x3c00c >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0x8c1c >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0x9700 >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xcd20 >> 2),
258         0x00000000,
259         (0x4e00 << 16) | (0xcd20 >> 2),
260         0x00000000,
261         (0x5e00 << 16) | (0xcd20 >> 2),
262         0x00000000,
263         (0x6e00 << 16) | (0xcd20 >> 2),
264         0x00000000,
265         (0x7e00 << 16) | (0xcd20 >> 2),
266         0x00000000,
267         (0x8e00 << 16) | (0xcd20 >> 2),
268         0x00000000,
269         (0x9e00 << 16) | (0xcd20 >> 2),
270         0x00000000,
271         (0xae00 << 16) | (0xcd20 >> 2),
272         0x00000000,
273         (0xbe00 << 16) | (0xcd20 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0x89bc >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x8900 >> 2),
278         0x00000000,
279         0x3,
280         (0x0e00 << 16) | (0xc130 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0xc134 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0xc1fc >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0xc208 >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0xc264 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0xc268 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xc26c >> 2),
293         0x00000000,
294         (0x0e00 << 16) | (0xc270 >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0xc274 >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0xc278 >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0xc27c >> 2),
301         0x00000000,
302         (0x0e00 << 16) | (0xc280 >> 2),
303         0x00000000,
304         (0x0e00 << 16) | (0xc284 >> 2),
305         0x00000000,
306         (0x0e00 << 16) | (0xc288 >> 2),
307         0x00000000,
308         (0x0e00 << 16) | (0xc28c >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0xc290 >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0xc294 >> 2),
313         0x00000000,
314         (0x0e00 << 16) | (0xc298 >> 2),
315         0x00000000,
316         (0x0e00 << 16) | (0xc29c >> 2),
317         0x00000000,
318         (0x0e00 << 16) | (0xc2a0 >> 2),
319         0x00000000,
320         (0x0e00 << 16) | (0xc2a4 >> 2),
321         0x00000000,
322         (0x0e00 << 16) | (0xc2a8 >> 2),
323         0x00000000,
324         (0x0e00 << 16) | (0xc2ac  >> 2),
325         0x00000000,
326         (0x0e00 << 16) | (0xc2b0 >> 2),
327         0x00000000,
328         (0x0e00 << 16) | (0x301d0 >> 2),
329         0x00000000,
330         (0x0e00 << 16) | (0x30238 >> 2),
331         0x00000000,
332         (0x0e00 << 16) | (0x30250 >> 2),
333         0x00000000,
334         (0x0e00 << 16) | (0x30254 >> 2),
335         0x00000000,
336         (0x0e00 << 16) | (0x30258 >> 2),
337         0x00000000,
338         (0x0e00 << 16) | (0x3025c >> 2),
339         0x00000000,
340         (0x4e00 << 16) | (0xc900 >> 2),
341         0x00000000,
342         (0x5e00 << 16) | (0xc900 >> 2),
343         0x00000000,
344         (0x6e00 << 16) | (0xc900 >> 2),
345         0x00000000,
346         (0x7e00 << 16) | (0xc900 >> 2),
347         0x00000000,
348         (0x8e00 << 16) | (0xc900 >> 2),
349         0x00000000,
350         (0x9e00 << 16) | (0xc900 >> 2),
351         0x00000000,
352         (0xae00 << 16) | (0xc900 >> 2),
353         0x00000000,
354         (0xbe00 << 16) | (0xc900 >> 2),
355         0x00000000,
356         (0x4e00 << 16) | (0xc904 >> 2),
357         0x00000000,
358         (0x5e00 << 16) | (0xc904 >> 2),
359         0x00000000,
360         (0x6e00 << 16) | (0xc904 >> 2),
361         0x00000000,
362         (0x7e00 << 16) | (0xc904 >> 2),
363         0x00000000,
364         (0x8e00 << 16) | (0xc904 >> 2),
365         0x00000000,
366         (0x9e00 << 16) | (0xc904 >> 2),
367         0x00000000,
368         (0xae00 << 16) | (0xc904 >> 2),
369         0x00000000,
370         (0xbe00 << 16) | (0xc904 >> 2),
371         0x00000000,
372         (0x4e00 << 16) | (0xc908 >> 2),
373         0x00000000,
374         (0x5e00 << 16) | (0xc908 >> 2),
375         0x00000000,
376         (0x6e00 << 16) | (0xc908 >> 2),
377         0x00000000,
378         (0x7e00 << 16) | (0xc908 >> 2),
379         0x00000000,
380         (0x8e00 << 16) | (0xc908 >> 2),
381         0x00000000,
382         (0x9e00 << 16) | (0xc908 >> 2),
383         0x00000000,
384         (0xae00 << 16) | (0xc908 >> 2),
385         0x00000000,
386         (0xbe00 << 16) | (0xc908 >> 2),
387         0x00000000,
388         (0x4e00 << 16) | (0xc90c >> 2),
389         0x00000000,
390         (0x5e00 << 16) | (0xc90c >> 2),
391         0x00000000,
392         (0x6e00 << 16) | (0xc90c >> 2),
393         0x00000000,
394         (0x7e00 << 16) | (0xc90c >> 2),
395         0x00000000,
396         (0x8e00 << 16) | (0xc90c >> 2),
397         0x00000000,
398         (0x9e00 << 16) | (0xc90c >> 2),
399         0x00000000,
400         (0xae00 << 16) | (0xc90c >> 2),
401         0x00000000,
402         (0xbe00 << 16) | (0xc90c >> 2),
403         0x00000000,
404         (0x4e00 << 16) | (0xc910 >> 2),
405         0x00000000,
406         (0x5e00 << 16) | (0xc910 >> 2),
407         0x00000000,
408         (0x6e00 << 16) | (0xc910 >> 2),
409         0x00000000,
410         (0x7e00 << 16) | (0xc910 >> 2),
411         0x00000000,
412         (0x8e00 << 16) | (0xc910 >> 2),
413         0x00000000,
414         (0x9e00 << 16) | (0xc910 >> 2),
415         0x00000000,
416         (0xae00 << 16) | (0xc910 >> 2),
417         0x00000000,
418         (0xbe00 << 16) | (0xc910 >> 2),
419         0x00000000,
420         (0x0e00 << 16) | (0xc99c >> 2),
421         0x00000000,
422         (0x0e00 << 16) | (0x9834 >> 2),
423         0x00000000,
424         (0x0000 << 16) | (0x30f00 >> 2),
425         0x00000000,
426         (0x0001 << 16) | (0x30f00 >> 2),
427         0x00000000,
428         (0x0000 << 16) | (0x30f04 >> 2),
429         0x00000000,
430         (0x0001 << 16) | (0x30f04 >> 2),
431         0x00000000,
432         (0x0000 << 16) | (0x30f08 >> 2),
433         0x00000000,
434         (0x0001 << 16) | (0x30f08 >> 2),
435         0x00000000,
436         (0x0000 << 16) | (0x30f0c >> 2),
437         0x00000000,
438         (0x0001 << 16) | (0x30f0c >> 2),
439         0x00000000,
440         (0x0600 << 16) | (0x9b7c >> 2),
441         0x00000000,
442         (0x0e00 << 16) | (0x8a14 >> 2),
443         0x00000000,
444         (0x0e00 << 16) | (0x8a18 >> 2),
445         0x00000000,
446         (0x0600 << 16) | (0x30a00 >> 2),
447         0x00000000,
448         (0x0e00 << 16) | (0x8bf0 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x8bcc >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x8b24 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x30a04 >> 2),
455         0x00000000,
456         (0x0600 << 16) | (0x30a10 >> 2),
457         0x00000000,
458         (0x0600 << 16) | (0x30a14 >> 2),
459         0x00000000,
460         (0x0600 << 16) | (0x30a18 >> 2),
461         0x00000000,
462         (0x0600 << 16) | (0x30a2c >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0xc700 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0xc704 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0xc708 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0xc768 >> 2),
471         0x00000000,
472         (0x0400 << 16) | (0xc770 >> 2),
473         0x00000000,
474         (0x0400 << 16) | (0xc774 >> 2),
475         0x00000000,
476         (0x0400 << 16) | (0xc778 >> 2),
477         0x00000000,
478         (0x0400 << 16) | (0xc77c >> 2),
479         0x00000000,
480         (0x0400 << 16) | (0xc780 >> 2),
481         0x00000000,
482         (0x0400 << 16) | (0xc784 >> 2),
483         0x00000000,
484         (0x0400 << 16) | (0xc788 >> 2),
485         0x00000000,
486         (0x0400 << 16) | (0xc78c >> 2),
487         0x00000000,
488         (0x0400 << 16) | (0xc798 >> 2),
489         0x00000000,
490         (0x0400 << 16) | (0xc79c >> 2),
491         0x00000000,
492         (0x0400 << 16) | (0xc7a0 >> 2),
493         0x00000000,
494         (0x0400 << 16) | (0xc7a4 >> 2),
495         0x00000000,
496         (0x0400 << 16) | (0xc7a8 >> 2),
497         0x00000000,
498         (0x0400 << 16) | (0xc7ac >> 2),
499         0x00000000,
500         (0x0400 << 16) | (0xc7b0 >> 2),
501         0x00000000,
502         (0x0400 << 16) | (0xc7b4 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x9100 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x3c010 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x92a8 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x92ac >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x92b4 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x92b8 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x92bc >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x92c0 >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x92c4 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x92c8 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x92cc >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x92d0 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0x8c00 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0x8c04 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0x8c20 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0x8c38 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0x8c3c >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0xae00 >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0x9604 >> 2),
541         0x00000000,
542         (0x0e00 << 16) | (0xac08 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xac0c >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xac10 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0xac14 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0xac58 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0xac68 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0xac6c >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0xac70 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0xac74 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0xac78 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0xac7c >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0xac80 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0xac84 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0xac88 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0xac8c >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x970c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x9714 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x9718 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x971c >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x31068 >> 2),
581         0x00000000,
582         (0x4e00 << 16) | (0x31068 >> 2),
583         0x00000000,
584         (0x5e00 << 16) | (0x31068 >> 2),
585         0x00000000,
586         (0x6e00 << 16) | (0x31068 >> 2),
587         0x00000000,
588         (0x7e00 << 16) | (0x31068 >> 2),
589         0x00000000,
590         (0x8e00 << 16) | (0x31068 >> 2),
591         0x00000000,
592         (0x9e00 << 16) | (0x31068 >> 2),
593         0x00000000,
594         (0xae00 << 16) | (0x31068 >> 2),
595         0x00000000,
596         (0xbe00 << 16) | (0x31068 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xcd10 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xcd14 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0x88b0 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x88b4 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0x88b8 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0x88bc >> 2),
609         0x00000000,
610         (0x0400 << 16) | (0x89c0 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0x88c4 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0x88c8 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0x88d0 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x88d4 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0x88d8 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0x8980 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x30938 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x3093c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x30940 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x89a0 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x30900 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x30904 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x89b4 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x3c210 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c214 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c218 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x8904 >> 2),
645         0x00000000,
646         0x5,
647         (0x0e00 << 16) | (0x8c28 >> 2),
648         (0x0e00 << 16) | (0x8c2c >> 2),
649         (0x0e00 << 16) | (0x8c30 >> 2),
650         (0x0e00 << 16) | (0x8c34 >> 2),
651         (0x0e00 << 16) | (0x9600 >> 2),
652 };
653
654 static const u32 kalindi_rlc_save_restore_register_list[] =
655 {
656         (0x0e00 << 16) | (0xc12c >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xc140 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xc150 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xc15c >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc168 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc170 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc204 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc2b4 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc2b8 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc2bc >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc2c0 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x8228 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x829c >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x869c >> 2),
683         0x00000000,
684         (0x0600 << 16) | (0x98f4 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x98f8 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x9900 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc260 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x90e8 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x3c000 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x3c00c >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x8c1c >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x9700 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0xcd20 >> 2),
703         0x00000000,
704         (0x4e00 << 16) | (0xcd20 >> 2),
705         0x00000000,
706         (0x5e00 << 16) | (0xcd20 >> 2),
707         0x00000000,
708         (0x6e00 << 16) | (0xcd20 >> 2),
709         0x00000000,
710         (0x7e00 << 16) | (0xcd20 >> 2),
711         0x00000000,
712         (0x0e00 << 16) | (0x89bc >> 2),
713         0x00000000,
714         (0x0e00 << 16) | (0x8900 >> 2),
715         0x00000000,
716         0x3,
717         (0x0e00 << 16) | (0xc130 >> 2),
718         0x00000000,
719         (0x0e00 << 16) | (0xc134 >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0xc1fc >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0xc208 >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc264 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0xc268 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0xc26c >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0xc270 >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0xc274 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0xc28c >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0xc290 >> 2),
738         0x00000000,
739         (0x0e00 << 16) | (0xc294 >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0xc298 >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0xc2a0 >> 2),
744         0x00000000,
745         (0x0e00 << 16) | (0xc2a4 >> 2),
746         0x00000000,
747         (0x0e00 << 16) | (0xc2a8 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc2ac >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x301d0 >> 2),
752         0x00000000,
753         (0x0e00 << 16) | (0x30238 >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x30250 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x30254 >> 2),
758         0x00000000,
759         (0x0e00 << 16) | (0x30258 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x3025c >> 2),
762         0x00000000,
763         (0x4e00 << 16) | (0xc900 >> 2),
764         0x00000000,
765         (0x5e00 << 16) | (0xc900 >> 2),
766         0x00000000,
767         (0x6e00 << 16) | (0xc900 >> 2),
768         0x00000000,
769         (0x7e00 << 16) | (0xc900 >> 2),
770         0x00000000,
771         (0x4e00 << 16) | (0xc904 >> 2),
772         0x00000000,
773         (0x5e00 << 16) | (0xc904 >> 2),
774         0x00000000,
775         (0x6e00 << 16) | (0xc904 >> 2),
776         0x00000000,
777         (0x7e00 << 16) | (0xc904 >> 2),
778         0x00000000,
779         (0x4e00 << 16) | (0xc908 >> 2),
780         0x00000000,
781         (0x5e00 << 16) | (0xc908 >> 2),
782         0x00000000,
783         (0x6e00 << 16) | (0xc908 >> 2),
784         0x00000000,
785         (0x7e00 << 16) | (0xc908 >> 2),
786         0x00000000,
787         (0x4e00 << 16) | (0xc90c >> 2),
788         0x00000000,
789         (0x5e00 << 16) | (0xc90c >> 2),
790         0x00000000,
791         (0x6e00 << 16) | (0xc90c >> 2),
792         0x00000000,
793         (0x7e00 << 16) | (0xc90c >> 2),
794         0x00000000,
795         (0x4e00 << 16) | (0xc910 >> 2),
796         0x00000000,
797         (0x5e00 << 16) | (0xc910 >> 2),
798         0x00000000,
799         (0x6e00 << 16) | (0xc910 >> 2),
800         0x00000000,
801         (0x7e00 << 16) | (0xc910 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc99c >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x9834 >> 2),
806         0x00000000,
807         (0x0000 << 16) | (0x30f00 >> 2),
808         0x00000000,
809         (0x0000 << 16) | (0x30f04 >> 2),
810         0x00000000,
811         (0x0000 << 16) | (0x30f08 >> 2),
812         0x00000000,
813         (0x0000 << 16) | (0x30f0c >> 2),
814         0x00000000,
815         (0x0600 << 16) | (0x9b7c >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x8a14 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x8a18 >> 2),
820         0x00000000,
821         (0x0600 << 16) | (0x30a00 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x8bf0 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0x8bcc >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0x8b24 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0x30a04 >> 2),
830         0x00000000,
831         (0x0600 << 16) | (0x30a10 >> 2),
832         0x00000000,
833         (0x0600 << 16) | (0x30a14 >> 2),
834         0x00000000,
835         (0x0600 << 16) | (0x30a18 >> 2),
836         0x00000000,
837         (0x0600 << 16) | (0x30a2c >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xc700 >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xc704 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xc708 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xc768 >> 2),
846         0x00000000,
847         (0x0400 << 16) | (0xc770 >> 2),
848         0x00000000,
849         (0x0400 << 16) | (0xc774 >> 2),
850         0x00000000,
851         (0x0400 << 16) | (0xc798 >> 2),
852         0x00000000,
853         (0x0400 << 16) | (0xc79c >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x9100 >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x3c010 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0x8c00 >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0x8c04 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0x8c20 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x8c38 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x8c3c >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xae00 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x9604 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0xac08 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0xac0c >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0xac10 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0xac14 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0xac58 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0xac68 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0xac6c >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0xac70 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0xac74 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0xac78 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0xac7c >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0xac80 >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0xac84 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0xac88 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xac8c >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x970c >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x9714 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x9718 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x971c >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x31068 >> 2),
912         0x00000000,
913         (0x4e00 << 16) | (0x31068 >> 2),
914         0x00000000,
915         (0x5e00 << 16) | (0x31068 >> 2),
916         0x00000000,
917         (0x6e00 << 16) | (0x31068 >> 2),
918         0x00000000,
919         (0x7e00 << 16) | (0x31068 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0xcd10 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0xcd14 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x88b0 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x88b4 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x88b8 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x88bc >> 2),
932         0x00000000,
933         (0x0400 << 16) | (0x89c0 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0x88c4 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0x88c8 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0x88d0 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0x88d4 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0x88d8 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0x8980 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0x30938 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0x3093c >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0x30940 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0x89a0 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0x30900 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0x30904 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0x89b4 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0x3e1fc >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0x3c210 >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x3c214 >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x3c218 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x8904 >> 2),
970         0x00000000,
971         0x5,
972         (0x0e00 << 16) | (0x8c28 >> 2),
973         (0x0e00 << 16) | (0x8c2c >> 2),
974         (0x0e00 << 16) | (0x8c30 >> 2),
975         (0x0e00 << 16) | (0x8c34 >> 2),
976         (0x0e00 << 16) | (0x9600 >> 2),
977 };
978
979 static const u32 bonaire_golden_spm_registers[] =
980 {
981         0x30800, 0xe0ffffff, 0xe0000000
982 };
983
984 static const u32 bonaire_golden_common_registers[] =
985 {
986         0xc770, 0xffffffff, 0x00000800,
987         0xc774, 0xffffffff, 0x00000800,
988         0xc798, 0xffffffff, 0x00007fbf,
989         0xc79c, 0xffffffff, 0x00007faf
990 };
991
992 static const u32 bonaire_golden_registers[] =
993 {
994         0x3354, 0x00000333, 0x00000333,
995         0x3350, 0x000c0fc0, 0x00040200,
996         0x9a10, 0x00010000, 0x00058208,
997         0x3c000, 0xffff1fff, 0x00140000,
998         0x3c200, 0xfdfc0fff, 0x00000100,
999         0x3c234, 0x40000000, 0x40000200,
1000         0x9830, 0xffffffff, 0x00000000,
1001         0x9834, 0xf00fffff, 0x00000400,
1002         0x9838, 0x0002021c, 0x00020200,
1003         0xc78, 0x00000080, 0x00000000,
1004         0x5bb0, 0x000000f0, 0x00000070,
1005         0x5bc0, 0xf0311fff, 0x80300000,
1006         0x98f8, 0x73773777, 0x12010001,
1007         0x350c, 0x00810000, 0x408af000,
1008         0x7030, 0x31000111, 0x00000011,
1009         0x2f48, 0x73773777, 0x12010001,
1010         0x220c, 0x00007fb6, 0x0021a1b1,
1011         0x2210, 0x00007fb6, 0x002021b1,
1012         0x2180, 0x00007fb6, 0x00002191,
1013         0x2218, 0x00007fb6, 0x002121b1,
1014         0x221c, 0x00007fb6, 0x002021b1,
1015         0x21dc, 0x00007fb6, 0x00002191,
1016         0x21e0, 0x00007fb6, 0x00002191,
1017         0x3628, 0x0000003f, 0x0000000a,
1018         0x362c, 0x0000003f, 0x0000000a,
1019         0x2ae4, 0x00073ffe, 0x000022a2,
1020         0x240c, 0x000007ff, 0x00000000,
1021         0x8a14, 0xf000003f, 0x00000007,
1022         0x8bf0, 0x00002001, 0x00000001,
1023         0x8b24, 0xffffffff, 0x00ffffff,
1024         0x30a04, 0x0000ff0f, 0x00000000,
1025         0x28a4c, 0x07ffffff, 0x06000000,
1026         0x4d8, 0x00000fff, 0x00000100,
1027         0x3e78, 0x00000001, 0x00000002,
1028         0x9100, 0x03000000, 0x0362c688,
1029         0x8c00, 0x000000ff, 0x00000001,
1030         0xe40, 0x00001fff, 0x00001fff,
1031         0x9060, 0x0000007f, 0x00000020,
1032         0x9508, 0x00010000, 0x00010000,
1033         0xac14, 0x000003ff, 0x000000f3,
1034         0xac0c, 0xffffffff, 0x00001032
1035 };
1036
1037 static const u32 bonaire_mgcg_cgcg_init[] =
1038 {
1039         0xc420, 0xffffffff, 0xfffffffc,
1040         0x30800, 0xffffffff, 0xe0000000,
1041         0x3c2a0, 0xffffffff, 0x00000100,
1042         0x3c208, 0xffffffff, 0x00000100,
1043         0x3c2c0, 0xffffffff, 0xc0000100,
1044         0x3c2c8, 0xffffffff, 0xc0000100,
1045         0x3c2c4, 0xffffffff, 0xc0000100,
1046         0x55e4, 0xffffffff, 0x00600100,
1047         0x3c280, 0xffffffff, 0x00000100,
1048         0x3c214, 0xffffffff, 0x06000100,
1049         0x3c220, 0xffffffff, 0x00000100,
1050         0x3c218, 0xffffffff, 0x06000100,
1051         0x3c204, 0xffffffff, 0x00000100,
1052         0x3c2e0, 0xffffffff, 0x00000100,
1053         0x3c224, 0xffffffff, 0x00000100,
1054         0x3c200, 0xffffffff, 0x00000100,
1055         0x3c230, 0xffffffff, 0x00000100,
1056         0x3c234, 0xffffffff, 0x00000100,
1057         0x3c250, 0xffffffff, 0x00000100,
1058         0x3c254, 0xffffffff, 0x00000100,
1059         0x3c258, 0xffffffff, 0x00000100,
1060         0x3c25c, 0xffffffff, 0x00000100,
1061         0x3c260, 0xffffffff, 0x00000100,
1062         0x3c27c, 0xffffffff, 0x00000100,
1063         0x3c278, 0xffffffff, 0x00000100,
1064         0x3c210, 0xffffffff, 0x06000100,
1065         0x3c290, 0xffffffff, 0x00000100,
1066         0x3c274, 0xffffffff, 0x00000100,
1067         0x3c2b4, 0xffffffff, 0x00000100,
1068         0x3c2b0, 0xffffffff, 0x00000100,
1069         0x3c270, 0xffffffff, 0x00000100,
1070         0x30800, 0xffffffff, 0xe0000000,
1071         0x3c020, 0xffffffff, 0x00010000,
1072         0x3c024, 0xffffffff, 0x00030002,
1073         0x3c028, 0xffffffff, 0x00040007,
1074         0x3c02c, 0xffffffff, 0x00060005,
1075         0x3c030, 0xffffffff, 0x00090008,
1076         0x3c034, 0xffffffff, 0x00010000,
1077         0x3c038, 0xffffffff, 0x00030002,
1078         0x3c03c, 0xffffffff, 0x00040007,
1079         0x3c040, 0xffffffff, 0x00060005,
1080         0x3c044, 0xffffffff, 0x00090008,
1081         0x3c048, 0xffffffff, 0x00010000,
1082         0x3c04c, 0xffffffff, 0x00030002,
1083         0x3c050, 0xffffffff, 0x00040007,
1084         0x3c054, 0xffffffff, 0x00060005,
1085         0x3c058, 0xffffffff, 0x00090008,
1086         0x3c05c, 0xffffffff, 0x00010000,
1087         0x3c060, 0xffffffff, 0x00030002,
1088         0x3c064, 0xffffffff, 0x00040007,
1089         0x3c068, 0xffffffff, 0x00060005,
1090         0x3c06c, 0xffffffff, 0x00090008,
1091         0x3c070, 0xffffffff, 0x00010000,
1092         0x3c074, 0xffffffff, 0x00030002,
1093         0x3c078, 0xffffffff, 0x00040007,
1094         0x3c07c, 0xffffffff, 0x00060005,
1095         0x3c080, 0xffffffff, 0x00090008,
1096         0x3c084, 0xffffffff, 0x00010000,
1097         0x3c088, 0xffffffff, 0x00030002,
1098         0x3c08c, 0xffffffff, 0x00040007,
1099         0x3c090, 0xffffffff, 0x00060005,
1100         0x3c094, 0xffffffff, 0x00090008,
1101         0x3c098, 0xffffffff, 0x00010000,
1102         0x3c09c, 0xffffffff, 0x00030002,
1103         0x3c0a0, 0xffffffff, 0x00040007,
1104         0x3c0a4, 0xffffffff, 0x00060005,
1105         0x3c0a8, 0xffffffff, 0x00090008,
1106         0x3c000, 0xffffffff, 0x96e00200,
1107         0x8708, 0xffffffff, 0x00900100,
1108         0xc424, 0xffffffff, 0x0020003f,
1109         0x38, 0xffffffff, 0x0140001c,
1110         0x3c, 0x000f0000, 0x000f0000,
1111         0x220, 0xffffffff, 0xC060000C,
1112         0x224, 0xc0000fff, 0x00000100,
1113         0xf90, 0xffffffff, 0x00000100,
1114         0xf98, 0x00000101, 0x00000000,
1115         0x20a8, 0xffffffff, 0x00000104,
1116         0x55e4, 0xff000fff, 0x00000100,
1117         0x30cc, 0xc0000fff, 0x00000104,
1118         0xc1e4, 0x00000001, 0x00000001,
1119         0xd00c, 0xff000ff0, 0x00000100,
1120         0xd80c, 0xff000ff0, 0x00000100
1121 };
1122
1123 static const u32 spectre_golden_spm_registers[] =
1124 {
1125         0x30800, 0xe0ffffff, 0xe0000000
1126 };
1127
1128 static const u32 spectre_golden_common_registers[] =
1129 {
1130         0xc770, 0xffffffff, 0x00000800,
1131         0xc774, 0xffffffff, 0x00000800,
1132         0xc798, 0xffffffff, 0x00007fbf,
1133         0xc79c, 0xffffffff, 0x00007faf
1134 };
1135
1136 static const u32 spectre_golden_registers[] =
1137 {
1138         0x3c000, 0xffff1fff, 0x96940200,
1139         0x3c00c, 0xffff0001, 0xff000000,
1140         0x3c200, 0xfffc0fff, 0x00000100,
1141         0x6ed8, 0x00010101, 0x00010000,
1142         0x9834, 0xf00fffff, 0x00000400,
1143         0x9838, 0xfffffffc, 0x00020200,
1144         0x5bb0, 0x000000f0, 0x00000070,
1145         0x5bc0, 0xf0311fff, 0x80300000,
1146         0x98f8, 0x73773777, 0x12010001,
1147         0x9b7c, 0x00ff0000, 0x00fc0000,
1148         0x2f48, 0x73773777, 0x12010001,
1149         0x8a14, 0xf000003f, 0x00000007,
1150         0x8b24, 0xffffffff, 0x00ffffff,
1151         0x28350, 0x3f3f3fff, 0x00000082,
1152         0x28354, 0x0000003f, 0x00000000,
1153         0x3e78, 0x00000001, 0x00000002,
1154         0x913c, 0xffff03df, 0x00000004,
1155         0xc768, 0x00000008, 0x00000008,
1156         0x8c00, 0x000008ff, 0x00000800,
1157         0x9508, 0x00010000, 0x00010000,
1158         0xac0c, 0xffffffff, 0x54763210,
1159         0x214f8, 0x01ff01ff, 0x00000002,
1160         0x21498, 0x007ff800, 0x00200000,
1161         0x2015c, 0xffffffff, 0x00000f40,
1162         0x30934, 0xffffffff, 0x00000001
1163 };
1164
1165 static const u32 spectre_mgcg_cgcg_init[] =
1166 {
1167         0xc420, 0xffffffff, 0xfffffffc,
1168         0x30800, 0xffffffff, 0xe0000000,
1169         0x3c2a0, 0xffffffff, 0x00000100,
1170         0x3c208, 0xffffffff, 0x00000100,
1171         0x3c2c0, 0xffffffff, 0x00000100,
1172         0x3c2c8, 0xffffffff, 0x00000100,
1173         0x3c2c4, 0xffffffff, 0x00000100,
1174         0x55e4, 0xffffffff, 0x00600100,
1175         0x3c280, 0xffffffff, 0x00000100,
1176         0x3c214, 0xffffffff, 0x06000100,
1177         0x3c220, 0xffffffff, 0x00000100,
1178         0x3c218, 0xffffffff, 0x06000100,
1179         0x3c204, 0xffffffff, 0x00000100,
1180         0x3c2e0, 0xffffffff, 0x00000100,
1181         0x3c224, 0xffffffff, 0x00000100,
1182         0x3c200, 0xffffffff, 0x00000100,
1183         0x3c230, 0xffffffff, 0x00000100,
1184         0x3c234, 0xffffffff, 0x00000100,
1185         0x3c250, 0xffffffff, 0x00000100,
1186         0x3c254, 0xffffffff, 0x00000100,
1187         0x3c258, 0xffffffff, 0x00000100,
1188         0x3c25c, 0xffffffff, 0x00000100,
1189         0x3c260, 0xffffffff, 0x00000100,
1190         0x3c27c, 0xffffffff, 0x00000100,
1191         0x3c278, 0xffffffff, 0x00000100,
1192         0x3c210, 0xffffffff, 0x06000100,
1193         0x3c290, 0xffffffff, 0x00000100,
1194         0x3c274, 0xffffffff, 0x00000100,
1195         0x3c2b4, 0xffffffff, 0x00000100,
1196         0x3c2b0, 0xffffffff, 0x00000100,
1197         0x3c270, 0xffffffff, 0x00000100,
1198         0x30800, 0xffffffff, 0xe0000000,
1199         0x3c020, 0xffffffff, 0x00010000,
1200         0x3c024, 0xffffffff, 0x00030002,
1201         0x3c028, 0xffffffff, 0x00040007,
1202         0x3c02c, 0xffffffff, 0x00060005,
1203         0x3c030, 0xffffffff, 0x00090008,
1204         0x3c034, 0xffffffff, 0x00010000,
1205         0x3c038, 0xffffffff, 0x00030002,
1206         0x3c03c, 0xffffffff, 0x00040007,
1207         0x3c040, 0xffffffff, 0x00060005,
1208         0x3c044, 0xffffffff, 0x00090008,
1209         0x3c048, 0xffffffff, 0x00010000,
1210         0x3c04c, 0xffffffff, 0x00030002,
1211         0x3c050, 0xffffffff, 0x00040007,
1212         0x3c054, 0xffffffff, 0x00060005,
1213         0x3c058, 0xffffffff, 0x00090008,
1214         0x3c05c, 0xffffffff, 0x00010000,
1215         0x3c060, 0xffffffff, 0x00030002,
1216         0x3c064, 0xffffffff, 0x00040007,
1217         0x3c068, 0xffffffff, 0x00060005,
1218         0x3c06c, 0xffffffff, 0x00090008,
1219         0x3c070, 0xffffffff, 0x00010000,
1220         0x3c074, 0xffffffff, 0x00030002,
1221         0x3c078, 0xffffffff, 0x00040007,
1222         0x3c07c, 0xffffffff, 0x00060005,
1223         0x3c080, 0xffffffff, 0x00090008,
1224         0x3c084, 0xffffffff, 0x00010000,
1225         0x3c088, 0xffffffff, 0x00030002,
1226         0x3c08c, 0xffffffff, 0x00040007,
1227         0x3c090, 0xffffffff, 0x00060005,
1228         0x3c094, 0xffffffff, 0x00090008,
1229         0x3c098, 0xffffffff, 0x00010000,
1230         0x3c09c, 0xffffffff, 0x00030002,
1231         0x3c0a0, 0xffffffff, 0x00040007,
1232         0x3c0a4, 0xffffffff, 0x00060005,
1233         0x3c0a8, 0xffffffff, 0x00090008,
1234         0x3c0ac, 0xffffffff, 0x00010000,
1235         0x3c0b0, 0xffffffff, 0x00030002,
1236         0x3c0b4, 0xffffffff, 0x00040007,
1237         0x3c0b8, 0xffffffff, 0x00060005,
1238         0x3c0bc, 0xffffffff, 0x00090008,
1239         0x3c000, 0xffffffff, 0x96e00200,
1240         0x8708, 0xffffffff, 0x00900100,
1241         0xc424, 0xffffffff, 0x0020003f,
1242         0x38, 0xffffffff, 0x0140001c,
1243         0x3c, 0x000f0000, 0x000f0000,
1244         0x220, 0xffffffff, 0xC060000C,
1245         0x224, 0xc0000fff, 0x00000100,
1246         0xf90, 0xffffffff, 0x00000100,
1247         0xf98, 0x00000101, 0x00000000,
1248         0x20a8, 0xffffffff, 0x00000104,
1249         0x55e4, 0xff000fff, 0x00000100,
1250         0x30cc, 0xc0000fff, 0x00000104,
1251         0xc1e4, 0x00000001, 0x00000001,
1252         0xd00c, 0xff000ff0, 0x00000100,
1253         0xd80c, 0xff000ff0, 0x00000100
1254 };
1255
1256 static const u32 kalindi_golden_spm_registers[] =
1257 {
1258         0x30800, 0xe0ffffff, 0xe0000000
1259 };
1260
1261 static const u32 kalindi_golden_common_registers[] =
1262 {
1263         0xc770, 0xffffffff, 0x00000800,
1264         0xc774, 0xffffffff, 0x00000800,
1265         0xc798, 0xffffffff, 0x00007fbf,
1266         0xc79c, 0xffffffff, 0x00007faf
1267 };
1268
1269 static const u32 kalindi_golden_registers[] =
1270 {
1271         0x3c000, 0xffffdfff, 0x6e944040,
1272         0x55e4, 0xff607fff, 0xfc000100,
1273         0x3c220, 0xff000fff, 0x00000100,
1274         0x3c224, 0xff000fff, 0x00000100,
1275         0x3c200, 0xfffc0fff, 0x00000100,
1276         0x6ed8, 0x00010101, 0x00010000,
1277         0x9830, 0xffffffff, 0x00000000,
1278         0x9834, 0xf00fffff, 0x00000400,
1279         0x5bb0, 0x000000f0, 0x00000070,
1280         0x5bc0, 0xf0311fff, 0x80300000,
1281         0x98f8, 0x73773777, 0x12010001,
1282         0x98fc, 0xffffffff, 0x00000010,
1283         0x9b7c, 0x00ff0000, 0x00fc0000,
1284         0x8030, 0x00001f0f, 0x0000100a,
1285         0x2f48, 0x73773777, 0x12010001,
1286         0x2408, 0x000fffff, 0x000c007f,
1287         0x8a14, 0xf000003f, 0x00000007,
1288         0x8b24, 0x3fff3fff, 0x00ffcfff,
1289         0x30a04, 0x0000ff0f, 0x00000000,
1290         0x28a4c, 0x07ffffff, 0x06000000,
1291         0x4d8, 0x00000fff, 0x00000100,
1292         0x3e78, 0x00000001, 0x00000002,
1293         0xc768, 0x00000008, 0x00000008,
1294         0x8c00, 0x000000ff, 0x00000003,
1295         0x214f8, 0x01ff01ff, 0x00000002,
1296         0x21498, 0x007ff800, 0x00200000,
1297         0x2015c, 0xffffffff, 0x00000f40,
1298         0x88c4, 0x001f3ae3, 0x00000082,
1299         0x88d4, 0x0000001f, 0x00000010,
1300         0x30934, 0xffffffff, 0x00000000
1301 };
1302
1303 static const u32 kalindi_mgcg_cgcg_init[] =
1304 {
1305         0xc420, 0xffffffff, 0xfffffffc,
1306         0x30800, 0xffffffff, 0xe0000000,
1307         0x3c2a0, 0xffffffff, 0x00000100,
1308         0x3c208, 0xffffffff, 0x00000100,
1309         0x3c2c0, 0xffffffff, 0x00000100,
1310         0x3c2c8, 0xffffffff, 0x00000100,
1311         0x3c2c4, 0xffffffff, 0x00000100,
1312         0x55e4, 0xffffffff, 0x00600100,
1313         0x3c280, 0xffffffff, 0x00000100,
1314         0x3c214, 0xffffffff, 0x06000100,
1315         0x3c220, 0xffffffff, 0x00000100,
1316         0x3c218, 0xffffffff, 0x06000100,
1317         0x3c204, 0xffffffff, 0x00000100,
1318         0x3c2e0, 0xffffffff, 0x00000100,
1319         0x3c224, 0xffffffff, 0x00000100,
1320         0x3c200, 0xffffffff, 0x00000100,
1321         0x3c230, 0xffffffff, 0x00000100,
1322         0x3c234, 0xffffffff, 0x00000100,
1323         0x3c250, 0xffffffff, 0x00000100,
1324         0x3c254, 0xffffffff, 0x00000100,
1325         0x3c258, 0xffffffff, 0x00000100,
1326         0x3c25c, 0xffffffff, 0x00000100,
1327         0x3c260, 0xffffffff, 0x00000100,
1328         0x3c27c, 0xffffffff, 0x00000100,
1329         0x3c278, 0xffffffff, 0x00000100,
1330         0x3c210, 0xffffffff, 0x06000100,
1331         0x3c290, 0xffffffff, 0x00000100,
1332         0x3c274, 0xffffffff, 0x00000100,
1333         0x3c2b4, 0xffffffff, 0x00000100,
1334         0x3c2b0, 0xffffffff, 0x00000100,
1335         0x3c270, 0xffffffff, 0x00000100,
1336         0x30800, 0xffffffff, 0xe0000000,
1337         0x3c020, 0xffffffff, 0x00010000,
1338         0x3c024, 0xffffffff, 0x00030002,
1339         0x3c028, 0xffffffff, 0x00040007,
1340         0x3c02c, 0xffffffff, 0x00060005,
1341         0x3c030, 0xffffffff, 0x00090008,
1342         0x3c034, 0xffffffff, 0x00010000,
1343         0x3c038, 0xffffffff, 0x00030002,
1344         0x3c03c, 0xffffffff, 0x00040007,
1345         0x3c040, 0xffffffff, 0x00060005,
1346         0x3c044, 0xffffffff, 0x00090008,
1347         0x3c000, 0xffffffff, 0x96e00200,
1348         0x8708, 0xffffffff, 0x00900100,
1349         0xc424, 0xffffffff, 0x0020003f,
1350         0x38, 0xffffffff, 0x0140001c,
1351         0x3c, 0x000f0000, 0x000f0000,
1352         0x220, 0xffffffff, 0xC060000C,
1353         0x224, 0xc0000fff, 0x00000100,
1354         0x20a8, 0xffffffff, 0x00000104,
1355         0x55e4, 0xff000fff, 0x00000100,
1356         0x30cc, 0xc0000fff, 0x00000104,
1357         0xc1e4, 0x00000001, 0x00000001,
1358         0xd00c, 0xff000ff0, 0x00000100,
1359         0xd80c, 0xff000ff0, 0x00000100
1360 };
1361
1362 static const u32 hawaii_golden_spm_registers[] =
1363 {
1364         0x30800, 0xe0ffffff, 0xe0000000
1365 };
1366
1367 static const u32 hawaii_golden_common_registers[] =
1368 {
1369         0x30800, 0xffffffff, 0xe0000000,
1370         0x28350, 0xffffffff, 0x3a00161a,
1371         0x28354, 0xffffffff, 0x0000002e,
1372         0x9a10, 0xffffffff, 0x00018208,
1373         0x98f8, 0xffffffff, 0x12011003
1374 };
1375
1376 static const u32 hawaii_golden_registers[] =
1377 {
1378         0x3354, 0x00000333, 0x00000333,
1379         0x9a10, 0x00010000, 0x00058208,
1380         0x9830, 0xffffffff, 0x00000000,
1381         0x9834, 0xf00fffff, 0x00000400,
1382         0x9838, 0x0002021c, 0x00020200,
1383         0xc78, 0x00000080, 0x00000000,
1384         0x5bb0, 0x000000f0, 0x00000070,
1385         0x5bc0, 0xf0311fff, 0x80300000,
1386         0x350c, 0x00810000, 0x408af000,
1387         0x7030, 0x31000111, 0x00000011,
1388         0x2f48, 0x73773777, 0x12010001,
1389         0x2120, 0x0000007f, 0x0000001b,
1390         0x21dc, 0x00007fb6, 0x00002191,
1391         0x3628, 0x0000003f, 0x0000000a,
1392         0x362c, 0x0000003f, 0x0000000a,
1393         0x2ae4, 0x00073ffe, 0x000022a2,
1394         0x240c, 0x000007ff, 0x00000000,
1395         0x8bf0, 0x00002001, 0x00000001,
1396         0x8b24, 0xffffffff, 0x00ffffff,
1397         0x30a04, 0x0000ff0f, 0x00000000,
1398         0x28a4c, 0x07ffffff, 0x06000000,
1399         0x3e78, 0x00000001, 0x00000002,
1400         0xc768, 0x00000008, 0x00000008,
1401         0xc770, 0x00000f00, 0x00000800,
1402         0xc774, 0x00000f00, 0x00000800,
1403         0xc798, 0x00ffffff, 0x00ff7fbf,
1404         0xc79c, 0x00ffffff, 0x00ff7faf,
1405         0x8c00, 0x000000ff, 0x00000800,
1406         0xe40, 0x00001fff, 0x00001fff,
1407         0x9060, 0x0000007f, 0x00000020,
1408         0x9508, 0x00010000, 0x00010000,
1409         0xae00, 0x00100000, 0x000ff07c,
1410         0xac14, 0x000003ff, 0x0000000f,
1411         0xac10, 0xffffffff, 0x7564fdec,
1412         0xac0c, 0xffffffff, 0x3120b9a8,
1413         0xac08, 0x20000000, 0x0f9c0000
1414 };
1415
1416 static const u32 hawaii_mgcg_cgcg_init[] =
1417 {
1418         0xc420, 0xffffffff, 0xfffffffd,
1419         0x30800, 0xffffffff, 0xe0000000,
1420         0x3c2a0, 0xffffffff, 0x00000100,
1421         0x3c208, 0xffffffff, 0x00000100,
1422         0x3c2c0, 0xffffffff, 0x00000100,
1423         0x3c2c8, 0xffffffff, 0x00000100,
1424         0x3c2c4, 0xffffffff, 0x00000100,
1425         0x55e4, 0xffffffff, 0x00200100,
1426         0x3c280, 0xffffffff, 0x00000100,
1427         0x3c214, 0xffffffff, 0x06000100,
1428         0x3c220, 0xffffffff, 0x00000100,
1429         0x3c218, 0xffffffff, 0x06000100,
1430         0x3c204, 0xffffffff, 0x00000100,
1431         0x3c2e0, 0xffffffff, 0x00000100,
1432         0x3c224, 0xffffffff, 0x00000100,
1433         0x3c200, 0xffffffff, 0x00000100,
1434         0x3c230, 0xffffffff, 0x00000100,
1435         0x3c234, 0xffffffff, 0x00000100,
1436         0x3c250, 0xffffffff, 0x00000100,
1437         0x3c254, 0xffffffff, 0x00000100,
1438         0x3c258, 0xffffffff, 0x00000100,
1439         0x3c25c, 0xffffffff, 0x00000100,
1440         0x3c260, 0xffffffff, 0x00000100,
1441         0x3c27c, 0xffffffff, 0x00000100,
1442         0x3c278, 0xffffffff, 0x00000100,
1443         0x3c210, 0xffffffff, 0x06000100,
1444         0x3c290, 0xffffffff, 0x00000100,
1445         0x3c274, 0xffffffff, 0x00000100,
1446         0x3c2b4, 0xffffffff, 0x00000100,
1447         0x3c2b0, 0xffffffff, 0x00000100,
1448         0x3c270, 0xffffffff, 0x00000100,
1449         0x30800, 0xffffffff, 0xe0000000,
1450         0x3c020, 0xffffffff, 0x00010000,
1451         0x3c024, 0xffffffff, 0x00030002,
1452         0x3c028, 0xffffffff, 0x00040007,
1453         0x3c02c, 0xffffffff, 0x00060005,
1454         0x3c030, 0xffffffff, 0x00090008,
1455         0x3c034, 0xffffffff, 0x00010000,
1456         0x3c038, 0xffffffff, 0x00030002,
1457         0x3c03c, 0xffffffff, 0x00040007,
1458         0x3c040, 0xffffffff, 0x00060005,
1459         0x3c044, 0xffffffff, 0x00090008,
1460         0x3c048, 0xffffffff, 0x00010000,
1461         0x3c04c, 0xffffffff, 0x00030002,
1462         0x3c050, 0xffffffff, 0x00040007,
1463         0x3c054, 0xffffffff, 0x00060005,
1464         0x3c058, 0xffffffff, 0x00090008,
1465         0x3c05c, 0xffffffff, 0x00010000,
1466         0x3c060, 0xffffffff, 0x00030002,
1467         0x3c064, 0xffffffff, 0x00040007,
1468         0x3c068, 0xffffffff, 0x00060005,
1469         0x3c06c, 0xffffffff, 0x00090008,
1470         0x3c070, 0xffffffff, 0x00010000,
1471         0x3c074, 0xffffffff, 0x00030002,
1472         0x3c078, 0xffffffff, 0x00040007,
1473         0x3c07c, 0xffffffff, 0x00060005,
1474         0x3c080, 0xffffffff, 0x00090008,
1475         0x3c084, 0xffffffff, 0x00010000,
1476         0x3c088, 0xffffffff, 0x00030002,
1477         0x3c08c, 0xffffffff, 0x00040007,
1478         0x3c090, 0xffffffff, 0x00060005,
1479         0x3c094, 0xffffffff, 0x00090008,
1480         0x3c098, 0xffffffff, 0x00010000,
1481         0x3c09c, 0xffffffff, 0x00030002,
1482         0x3c0a0, 0xffffffff, 0x00040007,
1483         0x3c0a4, 0xffffffff, 0x00060005,
1484         0x3c0a8, 0xffffffff, 0x00090008,
1485         0x3c0ac, 0xffffffff, 0x00010000,
1486         0x3c0b0, 0xffffffff, 0x00030002,
1487         0x3c0b4, 0xffffffff, 0x00040007,
1488         0x3c0b8, 0xffffffff, 0x00060005,
1489         0x3c0bc, 0xffffffff, 0x00090008,
1490         0x3c0c0, 0xffffffff, 0x00010000,
1491         0x3c0c4, 0xffffffff, 0x00030002,
1492         0x3c0c8, 0xffffffff, 0x00040007,
1493         0x3c0cc, 0xffffffff, 0x00060005,
1494         0x3c0d0, 0xffffffff, 0x00090008,
1495         0x3c0d4, 0xffffffff, 0x00010000,
1496         0x3c0d8, 0xffffffff, 0x00030002,
1497         0x3c0dc, 0xffffffff, 0x00040007,
1498         0x3c0e0, 0xffffffff, 0x00060005,
1499         0x3c0e4, 0xffffffff, 0x00090008,
1500         0x3c0e8, 0xffffffff, 0x00010000,
1501         0x3c0ec, 0xffffffff, 0x00030002,
1502         0x3c0f0, 0xffffffff, 0x00040007,
1503         0x3c0f4, 0xffffffff, 0x00060005,
1504         0x3c0f8, 0xffffffff, 0x00090008,
1505         0xc318, 0xffffffff, 0x00020200,
1506         0x3350, 0xffffffff, 0x00000200,
1507         0x15c0, 0xffffffff, 0x00000400,
1508         0x55e8, 0xffffffff, 0x00000000,
1509         0x2f50, 0xffffffff, 0x00000902,
1510         0x3c000, 0xffffffff, 0x96940200,
1511         0x8708, 0xffffffff, 0x00900100,
1512         0xc424, 0xffffffff, 0x0020003f,
1513         0x38, 0xffffffff, 0x0140001c,
1514         0x3c, 0x000f0000, 0x000f0000,
1515         0x220, 0xffffffff, 0xc060000c,
1516         0x224, 0xc0000fff, 0x00000100,
1517         0xf90, 0xffffffff, 0x00000100,
1518         0xf98, 0x00000101, 0x00000000,
1519         0x20a8, 0xffffffff, 0x00000104,
1520         0x55e4, 0xff000fff, 0x00000100,
1521         0x30cc, 0xc0000fff, 0x00000104,
1522         0xc1e4, 0x00000001, 0x00000001,
1523         0xd00c, 0xff000ff0, 0x00000100,
1524         0xd80c, 0xff000ff0, 0x00000100
1525 };
1526
1527 static const u32 godavari_golden_registers[] =
1528 {
1529         0x55e4, 0xff607fff, 0xfc000100,
1530         0x6ed8, 0x00010101, 0x00010000,
1531         0x9830, 0xffffffff, 0x00000000,
1532         0x98302, 0xf00fffff, 0x00000400,
1533         0x6130, 0xffffffff, 0x00010000,
1534         0x5bb0, 0x000000f0, 0x00000070,
1535         0x5bc0, 0xf0311fff, 0x80300000,
1536         0x98f8, 0x73773777, 0x12010001,
1537         0x98fc, 0xffffffff, 0x00000010,
1538         0x8030, 0x00001f0f, 0x0000100a,
1539         0x2f48, 0x73773777, 0x12010001,
1540         0x2408, 0x000fffff, 0x000c007f,
1541         0x8a14, 0xf000003f, 0x00000007,
1542         0x8b24, 0xffffffff, 0x00ff0fff,
1543         0x30a04, 0x0000ff0f, 0x00000000,
1544         0x28a4c, 0x07ffffff, 0x06000000,
1545         0x4d8, 0x00000fff, 0x00000100,
1546         0xd014, 0x00010000, 0x00810001,
1547         0xd814, 0x00010000, 0x00810001,
1548         0x3e78, 0x00000001, 0x00000002,
1549         0xc768, 0x00000008, 0x00000008,
1550         0xc770, 0x00000f00, 0x00000800,
1551         0xc774, 0x00000f00, 0x00000800,
1552         0xc798, 0x00ffffff, 0x00ff7fbf,
1553         0xc79c, 0x00ffffff, 0x00ff7faf,
1554         0x8c00, 0x000000ff, 0x00000001,
1555         0x214f8, 0x01ff01ff, 0x00000002,
1556         0x21498, 0x007ff800, 0x00200000,
1557         0x2015c, 0xffffffff, 0x00000f40,
1558         0x88c4, 0x001f3ae3, 0x00000082,
1559         0x88d4, 0x0000001f, 0x00000010,
1560         0x30934, 0xffffffff, 0x00000000
1561 };
1562
1563
1564 static void cik_init_golden_registers(struct radeon_device *rdev)
1565 {
1566         switch (rdev->family) {
1567         case CHIP_BONAIRE:
1568                 radeon_program_register_sequence(rdev,
1569                                                  bonaire_mgcg_cgcg_init,
1570                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1571                 radeon_program_register_sequence(rdev,
1572                                                  bonaire_golden_registers,
1573                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1574                 radeon_program_register_sequence(rdev,
1575                                                  bonaire_golden_common_registers,
1576                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1577                 radeon_program_register_sequence(rdev,
1578                                                  bonaire_golden_spm_registers,
1579                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1580                 break;
1581         case CHIP_KABINI:
1582                 radeon_program_register_sequence(rdev,
1583                                                  kalindi_mgcg_cgcg_init,
1584                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_golden_registers,
1587                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_common_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1591                 radeon_program_register_sequence(rdev,
1592                                                  kalindi_golden_spm_registers,
1593                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1594                 break;
1595         case CHIP_MULLINS:
1596                 radeon_program_register_sequence(rdev,
1597                                                  kalindi_mgcg_cgcg_init,
1598                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1599                 radeon_program_register_sequence(rdev,
1600                                                  godavari_golden_registers,
1601                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1602                 radeon_program_register_sequence(rdev,
1603                                                  kalindi_golden_common_registers,
1604                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1605                 radeon_program_register_sequence(rdev,
1606                                                  kalindi_golden_spm_registers,
1607                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1608                 break;
1609         case CHIP_KAVERI:
1610                 radeon_program_register_sequence(rdev,
1611                                                  spectre_mgcg_cgcg_init,
1612                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1613                 radeon_program_register_sequence(rdev,
1614                                                  spectre_golden_registers,
1615                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1616                 radeon_program_register_sequence(rdev,
1617                                                  spectre_golden_common_registers,
1618                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1619                 radeon_program_register_sequence(rdev,
1620                                                  spectre_golden_spm_registers,
1621                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1622                 break;
1623         case CHIP_HAWAII:
1624                 radeon_program_register_sequence(rdev,
1625                                                  hawaii_mgcg_cgcg_init,
1626                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1627                 radeon_program_register_sequence(rdev,
1628                                                  hawaii_golden_registers,
1629                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1630                 radeon_program_register_sequence(rdev,
1631                                                  hawaii_golden_common_registers,
1632                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1633                 radeon_program_register_sequence(rdev,
1634                                                  hawaii_golden_spm_registers,
1635                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1636                 break;
1637         default:
1638                 break;
1639         }
1640 }
1641
1642 /**
1643  * cik_get_xclk - get the xclk
1644  *
1645  * @rdev: radeon_device pointer
1646  *
1647  * Returns the reference clock used by the gfx engine
1648  * (CIK).
1649  */
1650 u32 cik_get_xclk(struct radeon_device *rdev)
1651 {
1652         u32 reference_clock = rdev->clock.spll.reference_freq;
1653
1654         if (rdev->flags & RADEON_IS_IGP) {
1655                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1656                         return reference_clock / 2;
1657         } else {
1658                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1659                         return reference_clock / 4;
1660         }
1661         return reference_clock;
1662 }
1663
1664 /**
1665  * cik_mm_rdoorbell - read a doorbell dword
1666  *
1667  * @rdev: radeon_device pointer
1668  * @index: doorbell index
1669  *
1670  * Returns the value in the doorbell aperture at the
1671  * requested doorbell index (CIK).
1672  */
1673 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1674 {
1675         if (index < rdev->doorbell.num_doorbells) {
1676                 return readl(rdev->doorbell.ptr + index);
1677         } else {
1678                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1679                 return 0;
1680         }
1681 }
1682
1683 /**
1684  * cik_mm_wdoorbell - write a doorbell dword
1685  *
1686  * @rdev: radeon_device pointer
1687  * @index: doorbell index
1688  * @v: value to write
1689  *
1690  * Writes @v to the doorbell aperture at the
1691  * requested doorbell index (CIK).
1692  */
1693 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1694 {
1695         if (index < rdev->doorbell.num_doorbells) {
1696                 writel(v, rdev->doorbell.ptr + index);
1697         } else {
1698                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1699         }
1700 }
1701
1702 #define BONAIRE_IO_MC_REGS_SIZE 36
1703
1704 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1705 {
1706         {0x00000070, 0x04400000},
1707         {0x00000071, 0x80c01803},
1708         {0x00000072, 0x00004004},
1709         {0x00000073, 0x00000100},
1710         {0x00000074, 0x00ff0000},
1711         {0x00000075, 0x34000000},
1712         {0x00000076, 0x08000014},
1713         {0x00000077, 0x00cc08ec},
1714         {0x00000078, 0x00000400},
1715         {0x00000079, 0x00000000},
1716         {0x0000007a, 0x04090000},
1717         {0x0000007c, 0x00000000},
1718         {0x0000007e, 0x4408a8e8},
1719         {0x0000007f, 0x00000304},
1720         {0x00000080, 0x00000000},
1721         {0x00000082, 0x00000001},
1722         {0x00000083, 0x00000002},
1723         {0x00000084, 0xf3e4f400},
1724         {0x00000085, 0x052024e3},
1725         {0x00000087, 0x00000000},
1726         {0x00000088, 0x01000000},
1727         {0x0000008a, 0x1c0a0000},
1728         {0x0000008b, 0xff010000},
1729         {0x0000008d, 0xffffefff},
1730         {0x0000008e, 0xfff3efff},
1731         {0x0000008f, 0xfff3efbf},
1732         {0x00000092, 0xf7ffffff},
1733         {0x00000093, 0xffffff7f},
1734         {0x00000095, 0x00101101},
1735         {0x00000096, 0x00000fff},
1736         {0x00000097, 0x00116fff},
1737         {0x00000098, 0x60010000},
1738         {0x00000099, 0x10010000},
1739         {0x0000009a, 0x00006000},
1740         {0x0000009b, 0x00001000},
1741         {0x0000009f, 0x00b48000}
1742 };
1743
1744 #define HAWAII_IO_MC_REGS_SIZE 22
1745
1746 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1747 {
1748         {0x0000007d, 0x40000000},
1749         {0x0000007e, 0x40180304},
1750         {0x0000007f, 0x0000ff00},
1751         {0x00000081, 0x00000000},
1752         {0x00000083, 0x00000800},
1753         {0x00000086, 0x00000000},
1754         {0x00000087, 0x00000100},
1755         {0x00000088, 0x00020100},
1756         {0x00000089, 0x00000000},
1757         {0x0000008b, 0x00040000},
1758         {0x0000008c, 0x00000100},
1759         {0x0000008e, 0xff010000},
1760         {0x00000090, 0xffffefff},
1761         {0x00000091, 0xfff3efff},
1762         {0x00000092, 0xfff3efbf},
1763         {0x00000093, 0xf7ffffff},
1764         {0x00000094, 0xffffff7f},
1765         {0x00000095, 0x00000fff},
1766         {0x00000096, 0x00116fff},
1767         {0x00000097, 0x60010000},
1768         {0x00000098, 0x10010000},
1769         {0x0000009f, 0x00c79000}
1770 };
1771
1772
1773 /**
1774  * cik_srbm_select - select specific register instances
1775  *
1776  * @rdev: radeon_device pointer
1777  * @me: selected ME (micro engine)
1778  * @pipe: pipe
1779  * @queue: queue
1780  * @vmid: VMID
1781  *
1782  * Switches the currently active registers instances.  Some
1783  * registers are instanced per VMID, others are instanced per
1784  * me/pipe/queue combination.
1785  */
1786 static void cik_srbm_select(struct radeon_device *rdev,
1787                             u32 me, u32 pipe, u32 queue, u32 vmid)
1788 {
1789         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1790                              MEID(me & 0x3) |
1791                              VMID(vmid & 0xf) |
1792                              QUEUEID(queue & 0x7));
1793         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1794 }
1795
1796 /* ucode loading */
1797 /**
1798  * ci_mc_load_microcode - load MC ucode into the hw
1799  *
1800  * @rdev: radeon_device pointer
1801  *
1802  * Load the GDDR MC ucode into the hw (CIK).
1803  * Returns 0 on success, error on failure.
1804  */
1805 int ci_mc_load_microcode(struct radeon_device *rdev)
1806 {
1807         const __be32 *fw_data = NULL;
1808         const __le32 *new_fw_data = NULL;
1809         u32 running, blackout = 0;
1810         u32 *io_mc_regs = NULL;
1811         const __le32 *new_io_mc_regs = NULL;
1812         int i, regs_size, ucode_size;
1813
1814         if (!rdev->mc_fw)
1815                 return -EINVAL;
1816
1817         if (rdev->new_fw) {
1818                 const struct mc_firmware_header_v1_0 *hdr =
1819                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1820
1821                 radeon_ucode_print_mc_hdr(&hdr->header);
1822
1823                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1824                 new_io_mc_regs = (const __le32 *)
1825                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1826                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1827                 new_fw_data = (const __le32 *)
1828                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1829         } else {
1830                 ucode_size = rdev->mc_fw->size / 4;
1831
1832                 switch (rdev->family) {
1833                 case CHIP_BONAIRE:
1834                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1835                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1836                         break;
1837                 case CHIP_HAWAII:
1838                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1839                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1840                         break;
1841                 default:
1842                         return -EINVAL;
1843                 }
1844                 fw_data = (const __be32 *)rdev->mc_fw->data;
1845         }
1846
1847         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1848
1849         if (running == 0) {
1850                 if (running) {
1851                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1852                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1853                 }
1854
1855                 /* reset the engine and set to writable */
1856                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1857                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1858
1859                 /* load mc io regs */
1860                 for (i = 0; i < regs_size; i++) {
1861                         if (rdev->new_fw) {
1862                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1863                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1864                         } else {
1865                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1866                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1867                         }
1868                 }
1869                 /* load the MC ucode */
1870                 for (i = 0; i < ucode_size; i++) {
1871                         if (rdev->new_fw)
1872                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1873                         else
1874                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1875                 }
1876
1877                 /* put the engine back into the active state */
1878                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1879                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1880                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1881
1882                 /* wait for training to complete */
1883                 for (i = 0; i < rdev->usec_timeout; i++) {
1884                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1885                                 break;
1886                         udelay(1);
1887                 }
1888                 for (i = 0; i < rdev->usec_timeout; i++) {
1889                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1890                                 break;
1891                         udelay(1);
1892                 }
1893
1894                 if (running)
1895                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1896         }
1897
1898         return 0;
1899 }
1900
1901 /**
1902  * cik_init_microcode - load ucode images from disk
1903  *
1904  * @rdev: radeon_device pointer
1905  *
1906  * Use the firmware interface to load the ucode images into
1907  * the driver (not loaded into hw).
1908  * Returns 0 on success, error on failure.
1909  */
1910 static int cik_init_microcode(struct radeon_device *rdev)
1911 {
1912         const char *chip_name;
1913         const char *new_chip_name;
1914         size_t pfp_req_size, me_req_size, ce_req_size,
1915                 mec_req_size, rlc_req_size, mc_req_size = 0,
1916                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1917         char fw_name[30];
1918         int new_fw = 0;
1919         int err;
1920         int num_fw;
1921
1922         DRM_DEBUG("\n");
1923
1924         switch (rdev->family) {
1925         case CHIP_BONAIRE:
1926                 chip_name = "BONAIRE";
1927                 new_chip_name = "bonaire";
1928                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1929                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1930                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1931                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1932                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1933                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1934                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1935                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1936                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1937                 num_fw = 8;
1938                 break;
1939         case CHIP_HAWAII:
1940                 chip_name = "HAWAII";
1941                 new_chip_name = "hawaii";
1942                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1944                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1948                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1949                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1951                 num_fw = 8;
1952                 break;
1953         case CHIP_KAVERI:
1954                 chip_name = "KAVERI";
1955                 new_chip_name = "kaveri";
1956                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1958                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1961                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1962                 num_fw = 7;
1963                 break;
1964         case CHIP_KABINI:
1965                 chip_name = "KABINI";
1966                 new_chip_name = "kabini";
1967                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1968                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1969                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1970                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1971                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1972                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1973                 num_fw = 6;
1974                 break;
1975         case CHIP_MULLINS:
1976                 chip_name = "MULLINS";
1977                 new_chip_name = "mullins";
1978                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1979                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1980                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1981                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1982                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1983                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1984                 num_fw = 6;
1985                 break;
1986         default: BUG();
1987         }
1988
1989         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1990
1991         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1992         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1993         if (err) {
1994                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1995                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1996                 if (err)
1997                         goto out;
1998                 if (rdev->pfp_fw->size != pfp_req_size) {
1999                         printk(KERN_ERR
2000                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2001                                rdev->pfp_fw->size, fw_name);
2002                         err = -EINVAL;
2003                         goto out;
2004                 }
2005         } else {
2006                 err = radeon_ucode_validate(rdev->pfp_fw);
2007                 if (err) {
2008                         printk(KERN_ERR
2009                                "cik_fw: validation failed for firmware \"%s\"\n",
2010                                fw_name);
2011                         goto out;
2012                 } else {
2013                         new_fw++;
2014                 }
2015         }
2016
2017         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2018         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2019         if (err) {
2020                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2021                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2022                 if (err)
2023                         goto out;
2024                 if (rdev->me_fw->size != me_req_size) {
2025                         printk(KERN_ERR
2026                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2027                                rdev->me_fw->size, fw_name);
2028                         err = -EINVAL;
2029                 }
2030         } else {
2031                 err = radeon_ucode_validate(rdev->me_fw);
2032                 if (err) {
2033                         printk(KERN_ERR
2034                                "cik_fw: validation failed for firmware \"%s\"\n",
2035                                fw_name);
2036                         goto out;
2037                 } else {
2038                         new_fw++;
2039                 }
2040         }
2041
2042         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2043         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2044         if (err) {
2045                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2046                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2047                 if (err)
2048                         goto out;
2049                 if (rdev->ce_fw->size != ce_req_size) {
2050                         printk(KERN_ERR
2051                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2052                                rdev->ce_fw->size, fw_name);
2053                         err = -EINVAL;
2054                 }
2055         } else {
2056                 err = radeon_ucode_validate(rdev->ce_fw);
2057                 if (err) {
2058                         printk(KERN_ERR
2059                                "cik_fw: validation failed for firmware \"%s\"\n",
2060                                fw_name);
2061                         goto out;
2062                 } else {
2063                         new_fw++;
2064                 }
2065         }
2066
2067         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2068         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2069         if (err) {
2070                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2071                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2072                 if (err)
2073                         goto out;
2074                 if (rdev->mec_fw->size != mec_req_size) {
2075                         printk(KERN_ERR
2076                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2077                                rdev->mec_fw->size, fw_name);
2078                         err = -EINVAL;
2079                 }
2080         } else {
2081                 err = radeon_ucode_validate(rdev->mec_fw);
2082                 if (err) {
2083                         printk(KERN_ERR
2084                                "cik_fw: validation failed for firmware \"%s\"\n",
2085                                fw_name);
2086                         goto out;
2087                 } else {
2088                         new_fw++;
2089                 }
2090         }
2091
2092         if (rdev->family == CHIP_KAVERI) {
2093                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2094                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2095                 if (err) {
2096                         goto out;
2097                 } else {
2098                         err = radeon_ucode_validate(rdev->mec2_fw);
2099                         if (err) {
2100                                 goto out;
2101                         } else {
2102                                 new_fw++;
2103                         }
2104                 }
2105         }
2106
2107         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2108         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2109         if (err) {
2110                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2111                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2112                 if (err)
2113                         goto out;
2114                 if (rdev->rlc_fw->size != rlc_req_size) {
2115                         printk(KERN_ERR
2116                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2117                                rdev->rlc_fw->size, fw_name);
2118                         err = -EINVAL;
2119                 }
2120         } else {
2121                 err = radeon_ucode_validate(rdev->rlc_fw);
2122                 if (err) {
2123                         printk(KERN_ERR
2124                                "cik_fw: validation failed for firmware \"%s\"\n",
2125                                fw_name);
2126                         goto out;
2127                 } else {
2128                         new_fw++;
2129                 }
2130         }
2131
2132         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2133         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2134         if (err) {
2135                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2136                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2137                 if (err)
2138                         goto out;
2139                 if (rdev->sdma_fw->size != sdma_req_size) {
2140                         printk(KERN_ERR
2141                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2142                                rdev->sdma_fw->size, fw_name);
2143                         err = -EINVAL;
2144                 }
2145         } else {
2146                 err = radeon_ucode_validate(rdev->sdma_fw);
2147                 if (err) {
2148                         printk(KERN_ERR
2149                                "cik_fw: validation failed for firmware \"%s\"\n",
2150                                fw_name);
2151                         goto out;
2152                 } else {
2153                         new_fw++;
2154                 }
2155         }
2156
2157         /* No SMC, MC ucode on APUs */
2158         if (!(rdev->flags & RADEON_IS_IGP)) {
2159                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2160                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2161                 if (err) {
2162                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2163                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2164                         if (err) {
2165                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2166                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2167                                 if (err)
2168                                         goto out;
2169                         }
2170                         if ((rdev->mc_fw->size != mc_req_size) &&
2171                             (rdev->mc_fw->size != mc2_req_size)){
2172                                 printk(KERN_ERR
2173                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2174                                        rdev->mc_fw->size, fw_name);
2175                                 err = -EINVAL;
2176                         }
2177                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2178                 } else {
2179                         err = radeon_ucode_validate(rdev->mc_fw);
2180                         if (err) {
2181                                 printk(KERN_ERR
2182                                        "cik_fw: validation failed for firmware \"%s\"\n",
2183                                        fw_name);
2184                                 goto out;
2185                         } else {
2186                                 new_fw++;
2187                         }
2188                 }
2189
2190                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2191                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2192                 if (err) {
2193                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2194                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2195                         if (err) {
2196                                 printk(KERN_ERR
2197                                        "smc: error loading firmware \"%s\"\n",
2198                                        fw_name);
2199                                 release_firmware(rdev->smc_fw);
2200                                 rdev->smc_fw = NULL;
2201                                 err = 0;
2202                         } else if (rdev->smc_fw->size != smc_req_size) {
2203                                 printk(KERN_ERR
2204                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2205                                        rdev->smc_fw->size, fw_name);
2206                                 err = -EINVAL;
2207                         }
2208                 } else {
2209                         err = radeon_ucode_validate(rdev->smc_fw);
2210                         if (err) {
2211                                 printk(KERN_ERR
2212                                        "cik_fw: validation failed for firmware \"%s\"\n",
2213                                        fw_name);
2214                                 goto out;
2215                         } else {
2216                                 new_fw++;
2217                         }
2218                 }
2219         }
2220
2221         if (new_fw == 0) {
2222                 rdev->new_fw = false;
2223         } else if (new_fw < num_fw) {
2224                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2225                 err = -EINVAL;
2226         } else {
2227                 rdev->new_fw = true;
2228         }
2229
2230 out:
2231         if (err) {
2232                 if (err != -EINVAL)
2233                         printk(KERN_ERR
2234                                "cik_cp: Failed to load firmware \"%s\"\n",
2235                                fw_name);
2236                 release_firmware(rdev->pfp_fw);
2237                 rdev->pfp_fw = NULL;
2238                 release_firmware(rdev->me_fw);
2239                 rdev->me_fw = NULL;
2240                 release_firmware(rdev->ce_fw);
2241                 rdev->ce_fw = NULL;
2242                 release_firmware(rdev->mec_fw);
2243                 rdev->mec_fw = NULL;
2244                 release_firmware(rdev->mec2_fw);
2245                 rdev->mec2_fw = NULL;
2246                 release_firmware(rdev->rlc_fw);
2247                 rdev->rlc_fw = NULL;
2248                 release_firmware(rdev->sdma_fw);
2249                 rdev->sdma_fw = NULL;
2250                 release_firmware(rdev->mc_fw);
2251                 rdev->mc_fw = NULL;
2252                 release_firmware(rdev->smc_fw);
2253                 rdev->smc_fw = NULL;
2254         }
2255         return err;
2256 }
2257
2258 /*
2259  * Core functions
2260  */
2261 /**
2262  * cik_tiling_mode_table_init - init the hw tiling table
2263  *
2264  * @rdev: radeon_device pointer
2265  *
2266  * Starting with SI, the tiling setup is done globally in a
2267  * set of 32 tiling modes.  Rather than selecting each set of
2268  * parameters per surface as on older asics, we just select
2269  * which index in the tiling table we want to use, and the
2270  * surface uses those parameters (CIK).
2271  */
2272 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2273 {
2274         const u32 num_tile_mode_states = 32;
2275         const u32 num_secondary_tile_mode_states = 16;
2276         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2277         u32 num_pipe_configs;
2278         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2279                 rdev->config.cik.max_shader_engines;
2280
2281         switch (rdev->config.cik.mem_row_size_in_kb) {
2282         case 1:
2283                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2284                 break;
2285         case 2:
2286         default:
2287                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2288                 break;
2289         case 4:
2290                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2291                 break;
2292         }
2293
2294         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2295         if (num_pipe_configs > 8)
2296                 num_pipe_configs = 16;
2297
2298         if (num_pipe_configs == 16) {
2299                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2300                         switch (reg_offset) {
2301                         case 0:
2302                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2304                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2306                                 break;
2307                         case 1:
2308                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2312                                 break;
2313                         case 2:
2314                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2316                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2318                                 break;
2319                         case 3:
2320                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2324                                 break;
2325                         case 4:
2326                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2328                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                                  TILE_SPLIT(split_equal_to_row_size));
2330                                 break;
2331                         case 5:
2332                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                                 break;
2336                         case 6:
2337                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2338                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2339                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2341                                 break;
2342                         case 7:
2343                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2344                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2345                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                                  TILE_SPLIT(split_equal_to_row_size));
2347                                 break;
2348                         case 8:
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2350                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2351                                 break;
2352                         case 9:
2353                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2356                                 break;
2357                         case 10:
2358                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362                                 break;
2363                         case 11:
2364                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2367                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                                 break;
2369                         case 12:
2370                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374                                 break;
2375                         case 13:
2376                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2379                                 break;
2380                         case 14:
2381                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385                                 break;
2386                         case 16:
2387                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2390                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2391                                 break;
2392                         case 17:
2393                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397                                 break;
2398                         case 27:
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2402                                 break;
2403                         case 28:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2406                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408                                 break;
2409                         case 29:
2410                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2412                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                                 break;
2415                         case 30:
2416                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                                 break;
2421                         default:
2422                                 gb_tile_moden = 0;
2423                                 break;
2424                         }
2425                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2426                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2427                 }
2428                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2429                         switch (reg_offset) {
2430                         case 0:
2431                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2435                                 break;
2436                         case 1:
2437                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2441                                 break;
2442                         case 2:
2443                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2446                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2447                                 break;
2448                         case 3:
2449                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2453                                 break;
2454                         case 4:
2455                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2459                                 break;
2460                         case 5:
2461                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2465                                 break;
2466                         case 6:
2467                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2471                                 break;
2472                         case 8:
2473                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2477                                 break;
2478                         case 9:
2479                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2481                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2483                                 break;
2484                         case 10:
2485                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2489                                 break;
2490                         case 11:
2491                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2495                                 break;
2496                         case 12:
2497                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2501                                 break;
2502                         case 13:
2503                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2506                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2507                                 break;
2508                         case 14:
2509                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2513                                 break;
2514                         default:
2515                                 gb_tile_moden = 0;
2516                                 break;
2517                         }
2518                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2519                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2520                 }
2521         } else if (num_pipe_configs == 8) {
2522                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2523                         switch (reg_offset) {
2524                         case 0:
2525                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2529                                 break;
2530                         case 1:
2531                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2533                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2535                                 break;
2536                         case 2:
2537                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2541                                 break;
2542                         case 3:
2543                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2547                                 break;
2548                         case 4:
2549                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                                  TILE_SPLIT(split_equal_to_row_size));
2553                                 break;
2554                         case 5:
2555                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2558                                 break;
2559                         case 6:
2560                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564                                 break;
2565                         case 7:
2566                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2567                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2568                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                                  TILE_SPLIT(split_equal_to_row_size));
2570                                 break;
2571                         case 8:
2572                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2573                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2574                                 break;
2575                         case 9:
2576                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2579                                 break;
2580                         case 10:
2581                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585                                 break;
2586                         case 11:
2587                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2588                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                                 break;
2592                         case 12:
2593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2594                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597                                 break;
2598                         case 13:
2599                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2602                                 break;
2603                         case 14:
2604                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608                                 break;
2609                         case 16:
2610                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614                                 break;
2615                         case 17:
2616                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2617                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                                 break;
2621                         case 27:
2622                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2625                                 break;
2626                         case 28:
2627                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631                                 break;
2632                         case 29:
2633                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2635                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637                                 break;
2638                         case 30:
2639                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2640                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                                 break;
2644                         default:
2645                                 gb_tile_moden = 0;
2646                                 break;
2647                         }
2648                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2649                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2650                 }
2651                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2652                         switch (reg_offset) {
2653                         case 0:
2654                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2656                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2658                                 break;
2659                         case 1:
2660                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2663                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2664                                 break;
2665                         case 2:
2666                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2670                                 break;
2671                         case 3:
2672                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2674                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2676                                 break;
2677                         case 4:
2678                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2682                                 break;
2683                         case 5:
2684                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2688                                 break;
2689                         case 6:
2690                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2694                                 break;
2695                         case 8:
2696                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2700                                 break;
2701                         case 9:
2702                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2706                                 break;
2707                         case 10:
2708                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2712                                 break;
2713                         case 11:
2714                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2718                                 break;
2719                         case 12:
2720                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2723                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2724                                 break;
2725                         case 13:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2730                                 break;
2731                         case 14:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2735                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2736                                 break;
2737                         default:
2738                                 gb_tile_moden = 0;
2739                                 break;
2740                         }
2741                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2742                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2743                 }
2744         } else if (num_pipe_configs == 4) {
2745                 if (num_rbs == 4) {
2746                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2747                                 switch (reg_offset) {
2748                                 case 0:
2749                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2753                                         break;
2754                                 case 1:
2755                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2757                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2759                                         break;
2760                                 case 2:
2761                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2765                                         break;
2766                                 case 3:
2767                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2769                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2771                                         break;
2772                                 case 4:
2773                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                                          TILE_SPLIT(split_equal_to_row_size));
2777                                         break;
2778                                 case 5:
2779                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2780                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2782                                         break;
2783                                 case 6:
2784                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788                                         break;
2789                                 case 7:
2790                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                                          TILE_SPLIT(split_equal_to_row_size));
2794                                         break;
2795                                 case 8:
2796                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2797                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2798                                         break;
2799                                 case 9:
2800                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2803                                         break;
2804                                 case 10:
2805                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809                                         break;
2810                                 case 11:
2811                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2812                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                                         break;
2816                                 case 12:
2817                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2818                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2821                                         break;
2822                                 case 13:
2823                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2824                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2826                                         break;
2827                                 case 14:
2828                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2832                                         break;
2833                                 case 16:
2834                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2835                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2836                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838                                         break;
2839                                 case 17:
2840                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2841                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844                                         break;
2845                                 case 27:
2846                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2847                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2849                                         break;
2850                                 case 28:
2851                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2852                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2853                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                                         break;
2856                                 case 29:
2857                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2859                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861                                         break;
2862                                 case 30:
2863                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2864                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                                         break;
2868                                 default:
2869                                         gb_tile_moden = 0;
2870                                         break;
2871                                 }
2872                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2873                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2874                         }
2875                 } else if (num_rbs < 4) {
2876                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2877                                 switch (reg_offset) {
2878                                 case 0:
2879                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2881                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2883                                         break;
2884                                 case 1:
2885                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2888                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889                                         break;
2890                                 case 2:
2891                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895                                         break;
2896                                 case 3:
2897                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2900                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2901                                         break;
2902                                 case 4:
2903                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2906                                                          TILE_SPLIT(split_equal_to_row_size));
2907                                         break;
2908                                 case 5:
2909                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2910                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2911                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912                                         break;
2913                                 case 6:
2914                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2915                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2916                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2917                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2918                                         break;
2919                                 case 7:
2920                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2921                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2922                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2923                                                          TILE_SPLIT(split_equal_to_row_size));
2924                                         break;
2925                                 case 8:
2926                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2927                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2928                                         break;
2929                                 case 9:
2930                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2933                                         break;
2934                                 case 10:
2935                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2938                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                                         break;
2940                                 case 11:
2941                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2944                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                                         break;
2946                                 case 12:
2947                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2950                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                                         break;
2952                                 case 13:
2953                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2955                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956                                         break;
2957                                 case 14:
2958                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2959                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2961                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962                                         break;
2963                                 case 16:
2964                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2967                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968                                         break;
2969                                 case 17:
2970                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2973                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974                                         break;
2975                                 case 27:
2976                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2977                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2979                                         break;
2980                                 case 28:
2981                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2982                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2984                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985                                         break;
2986                                 case 29:
2987                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2989                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2990                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991                                         break;
2992                                 case 30:
2993                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2996                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                                         break;
2998                                 default:
2999                                         gb_tile_moden = 0;
3000                                         break;
3001                                 }
3002                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3003                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3004                         }
3005                 }
3006                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3007                         switch (reg_offset) {
3008                         case 0:
3009                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3013                                 break;
3014                         case 1:
3015                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3019                                 break;
3020                         case 2:
3021                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3024                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3025                                 break;
3026                         case 3:
3027                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3031                                 break;
3032                         case 4:
3033                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3036                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3037                                 break;
3038                         case 5:
3039                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3043                                 break;
3044                         case 6:
3045                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3048                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3049                                 break;
3050                         case 8:
3051                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3052                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3053                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3054                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3055                                 break;
3056                         case 9:
3057                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3058                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3061                                 break;
3062                         case 10:
3063                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3067                                 break;
3068                         case 11:
3069                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3073                                 break;
3074                         case 12:
3075                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3079                                 break;
3080                         case 13:
3081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3084                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3085                                 break;
3086                         case 14:
3087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3090                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3091                                 break;
3092                         default:
3093                                 gb_tile_moden = 0;
3094                                 break;
3095                         }
3096                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3097                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3098                 }
3099         } else if (num_pipe_configs == 2) {
3100                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3101                         switch (reg_offset) {
3102                         case 0:
3103                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3105                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3106                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3107                                 break;
3108                         case 1:
3109                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3111                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3112                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3113                                 break;
3114                         case 2:
3115                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3117                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3118                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3119                                 break;
3120                         case 3:
3121                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3123                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3125                                 break;
3126                         case 4:
3127                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3129                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3130                                                  TILE_SPLIT(split_equal_to_row_size));
3131                                 break;
3132                         case 5:
3133                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3135                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3136                                 break;
3137                         case 6:
3138                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3139                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3140                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3142                                 break;
3143                         case 7:
3144                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3145                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3146                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3147                                                  TILE_SPLIT(split_equal_to_row_size));
3148                                 break;
3149                         case 8:
3150                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3151                                                 PIPE_CONFIG(ADDR_SURF_P2);
3152                                 break;
3153                         case 9:
3154                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3155                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3156                                                  PIPE_CONFIG(ADDR_SURF_P2));
3157                                 break;
3158                         case 10:
3159                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3161                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163                                 break;
3164                         case 11:
3165                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3166                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3167                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3168                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169                                 break;
3170                         case 12:
3171                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3172                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175                                 break;
3176                         case 13:
3177                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3178                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3179                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3180                                 break;
3181                         case 14:
3182                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186                                 break;
3187                         case 16:
3188                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3189                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3191                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192                                 break;
3193                         case 17:
3194                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3195                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3196                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198                                 break;
3199                         case 27:
3200                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202                                                  PIPE_CONFIG(ADDR_SURF_P2));
3203                                 break;
3204                         case 28:
3205                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3206                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3207                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3208                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3209                                 break;
3210                         case 29:
3211                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3212                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3213                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3214                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215                                 break;
3216                         case 30:
3217                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3218                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3219                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3220                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3221                                 break;
3222                         default:
3223                                 gb_tile_moden = 0;
3224                                 break;
3225                         }
3226                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3227                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3228                 }
3229                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3230                         switch (reg_offset) {
3231                         case 0:
3232                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3234                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3236                                 break;
3237                         case 1:
3238                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3239                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3242                                 break;
3243                         case 2:
3244                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3246                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3248                                 break;
3249                         case 3:
3250                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3254                                 break;
3255                         case 4:
3256                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3258                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3260                                 break;
3261                         case 5:
3262                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3266                                 break;
3267                         case 6:
3268                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3271                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3272                                 break;
3273                         case 8:
3274                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3275                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3276                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3278                                 break;
3279                         case 9:
3280                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3281                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3282                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3283                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3284                                 break;
3285                         case 10:
3286                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3287                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3288                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3290                                 break;
3291                         case 11:
3292                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3293                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3294                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3296                                 break;
3297                         case 12:
3298                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3299                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3300                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3302                                 break;
3303                         case 13:
3304                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3308                                 break;
3309                         case 14:
3310                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3314                                 break;
3315                         default:
3316                                 gb_tile_moden = 0;
3317                                 break;
3318                         }
3319                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3320                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3321                 }
3322         } else
3323                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3324 }
3325
3326 /**
3327  * cik_select_se_sh - select which SE, SH to address
3328  *
3329  * @rdev: radeon_device pointer
3330  * @se_num: shader engine to address
3331  * @sh_num: sh block to address
3332  *
3333  * Select which SE, SH combinations to address. Certain
3334  * registers are instanced per SE or SH.  0xffffffff means
3335  * broadcast to all SEs or SHs (CIK).
3336  */
3337 static void cik_select_se_sh(struct radeon_device *rdev,
3338                              u32 se_num, u32 sh_num)
3339 {
3340         u32 data = INSTANCE_BROADCAST_WRITES;
3341
3342         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3343                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3344         else if (se_num == 0xffffffff)
3345                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3346         else if (sh_num == 0xffffffff)
3347                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3348         else
3349                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3350         WREG32(GRBM_GFX_INDEX, data);
3351 }
3352
3353 /**
3354  * cik_create_bitmask - create a bitmask
3355  *
3356  * @bit_width: length of the mask
3357  *
3358  * create a variable length bit mask (CIK).
3359  * Returns the bitmask.
3360  */
3361 static u32 cik_create_bitmask(u32 bit_width)
3362 {
3363         u32 i, mask = 0;
3364
3365         for (i = 0; i < bit_width; i++) {
3366                 mask <<= 1;
3367                 mask |= 1;
3368         }
3369         return mask;
3370 }
3371
3372 /**
3373  * cik_get_rb_disabled - computes the mask of disabled RBs
3374  *
3375  * @rdev: radeon_device pointer
3376  * @max_rb_num: max RBs (render backends) for the asic
3377  * @se_num: number of SEs (shader engines) for the asic
3378  * @sh_per_se: number of SH blocks per SE for the asic
3379  *
3380  * Calculates the bitmask of disabled RBs (CIK).
3381  * Returns the disabled RB bitmask.
3382  */
3383 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3384                               u32 max_rb_num_per_se,
3385                               u32 sh_per_se)
3386 {
3387         u32 data, mask;
3388
3389         data = RREG32(CC_RB_BACKEND_DISABLE);
3390         if (data & 1)
3391                 data &= BACKEND_DISABLE_MASK;
3392         else
3393                 data = 0;
3394         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3395
3396         data >>= BACKEND_DISABLE_SHIFT;
3397
3398         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3399
3400         return data & mask;
3401 }
3402
3403 /**
3404  * cik_setup_rb - setup the RBs on the asic
3405  *
3406  * @rdev: radeon_device pointer
3407  * @se_num: number of SEs (shader engines) for the asic
3408  * @sh_per_se: number of SH blocks per SE for the asic
3409  * @max_rb_num: max RBs (render backends) for the asic
3410  *
3411  * Configures per-SE/SH RB registers (CIK).
3412  */
3413 static void cik_setup_rb(struct radeon_device *rdev,
3414                          u32 se_num, u32 sh_per_se,
3415                          u32 max_rb_num_per_se)
3416 {
3417         int i, j;
3418         u32 data, mask;
3419         u32 disabled_rbs = 0;
3420         u32 enabled_rbs = 0;
3421
3422         for (i = 0; i < se_num; i++) {
3423                 for (j = 0; j < sh_per_se; j++) {
3424                         cik_select_se_sh(rdev, i, j);
3425                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3426                         if (rdev->family == CHIP_HAWAII)
3427                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3428                         else
3429                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3430                 }
3431         }
3432         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3433
3434         mask = 1;
3435         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3436                 if (!(disabled_rbs & mask))
3437                         enabled_rbs |= mask;
3438                 mask <<= 1;
3439         }
3440
3441         rdev->config.cik.backend_enable_mask = enabled_rbs;
3442
3443         for (i = 0; i < se_num; i++) {
3444                 cik_select_se_sh(rdev, i, 0xffffffff);
3445                 data = 0;
3446                 for (j = 0; j < sh_per_se; j++) {
3447                         switch (enabled_rbs & 3) {
3448                         case 0:
3449                                 if (j == 0)
3450                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3451                                 else
3452                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3453                                 break;
3454                         case 1:
3455                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3456                                 break;
3457                         case 2:
3458                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3459                                 break;
3460                         case 3:
3461                         default:
3462                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3463                                 break;
3464                         }
3465                         enabled_rbs >>= 2;
3466                 }
3467                 WREG32(PA_SC_RASTER_CONFIG, data);
3468         }
3469         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3470 }
3471
3472 /**
3473  * cik_gpu_init - setup the 3D engine
3474  *
3475  * @rdev: radeon_device pointer
3476  *
3477  * Configures the 3D engine and tiling configuration
3478  * registers so that the 3D engine is usable.
3479  */
3480 static void cik_gpu_init(struct radeon_device *rdev)
3481 {
3482         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3483         u32 mc_shared_chmap, mc_arb_ramcfg;
3484         u32 hdp_host_path_cntl;
3485         u32 tmp;
3486         int i, j;
3487
3488         switch (rdev->family) {
3489         case CHIP_BONAIRE:
3490                 rdev->config.cik.max_shader_engines = 2;
3491                 rdev->config.cik.max_tile_pipes = 4;
3492                 rdev->config.cik.max_cu_per_sh = 7;
3493                 rdev->config.cik.max_sh_per_se = 1;
3494                 rdev->config.cik.max_backends_per_se = 2;
3495                 rdev->config.cik.max_texture_channel_caches = 4;
3496                 rdev->config.cik.max_gprs = 256;
3497                 rdev->config.cik.max_gs_threads = 32;
3498                 rdev->config.cik.max_hw_contexts = 8;
3499
3500                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3501                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3502                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3503                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3504                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3505                 break;
3506         case CHIP_HAWAII:
3507                 rdev->config.cik.max_shader_engines = 4;
3508                 rdev->config.cik.max_tile_pipes = 16;
3509                 rdev->config.cik.max_cu_per_sh = 11;
3510                 rdev->config.cik.max_sh_per_se = 1;
3511                 rdev->config.cik.max_backends_per_se = 4;
3512                 rdev->config.cik.max_texture_channel_caches = 16;
3513                 rdev->config.cik.max_gprs = 256;
3514                 rdev->config.cik.max_gs_threads = 32;
3515                 rdev->config.cik.max_hw_contexts = 8;
3516
3517                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3522                 break;
3523         case CHIP_KAVERI:
3524                 rdev->config.cik.max_shader_engines = 1;
3525                 rdev->config.cik.max_tile_pipes = 4;
3526                 if ((rdev->pdev->device == 0x1304) ||
3527                     (rdev->pdev->device == 0x1305) ||
3528                     (rdev->pdev->device == 0x130C) ||
3529                     (rdev->pdev->device == 0x130F) ||
3530                     (rdev->pdev->device == 0x1310) ||
3531                     (rdev->pdev->device == 0x1311) ||
3532                     (rdev->pdev->device == 0x131C)) {
3533                         rdev->config.cik.max_cu_per_sh = 8;
3534                         rdev->config.cik.max_backends_per_se = 2;
3535                 } else if ((rdev->pdev->device == 0x1309) ||
3536                            (rdev->pdev->device == 0x130A) ||
3537                            (rdev->pdev->device == 0x130D) ||
3538                            (rdev->pdev->device == 0x1313) ||
3539                            (rdev->pdev->device == 0x131D)) {
3540                         rdev->config.cik.max_cu_per_sh = 6;
3541                         rdev->config.cik.max_backends_per_se = 2;
3542                 } else if ((rdev->pdev->device == 0x1306) ||
3543                            (rdev->pdev->device == 0x1307) ||
3544                            (rdev->pdev->device == 0x130B) ||
3545                            (rdev->pdev->device == 0x130E) ||
3546                            (rdev->pdev->device == 0x1315) ||
3547                            (rdev->pdev->device == 0x131B)) {
3548                         rdev->config.cik.max_cu_per_sh = 4;
3549                         rdev->config.cik.max_backends_per_se = 1;
3550                 } else {
3551                         rdev->config.cik.max_cu_per_sh = 3;
3552                         rdev->config.cik.max_backends_per_se = 1;
3553                 }
3554                 rdev->config.cik.max_sh_per_se = 1;
3555                 rdev->config.cik.max_texture_channel_caches = 4;
3556                 rdev->config.cik.max_gprs = 256;
3557                 rdev->config.cik.max_gs_threads = 16;
3558                 rdev->config.cik.max_hw_contexts = 8;
3559
3560                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3561                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3562                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3563                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3564                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3565                 break;
3566         case CHIP_KABINI:
3567         case CHIP_MULLINS:
3568         default:
3569                 rdev->config.cik.max_shader_engines = 1;
3570                 rdev->config.cik.max_tile_pipes = 2;
3571                 rdev->config.cik.max_cu_per_sh = 2;
3572                 rdev->config.cik.max_sh_per_se = 1;
3573                 rdev->config.cik.max_backends_per_se = 1;
3574                 rdev->config.cik.max_texture_channel_caches = 2;
3575                 rdev->config.cik.max_gprs = 256;
3576                 rdev->config.cik.max_gs_threads = 16;
3577                 rdev->config.cik.max_hw_contexts = 8;
3578
3579                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3580                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3581                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3582                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3583                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3584                 break;
3585         }
3586
3587         /* Initialize HDP */
3588         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3589                 WREG32((0x2c14 + j), 0x00000000);
3590                 WREG32((0x2c18 + j), 0x00000000);
3591                 WREG32((0x2c1c + j), 0x00000000);
3592                 WREG32((0x2c20 + j), 0x00000000);
3593                 WREG32((0x2c24 + j), 0x00000000);
3594         }
3595
3596         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3597
3598         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3599
3600         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3601         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3602
3603         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3604         rdev->config.cik.mem_max_burst_length_bytes = 256;
3605         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3606         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3607         if (rdev->config.cik.mem_row_size_in_kb > 4)
3608                 rdev->config.cik.mem_row_size_in_kb = 4;
3609         /* XXX use MC settings? */
3610         rdev->config.cik.shader_engine_tile_size = 32;
3611         rdev->config.cik.num_gpus = 1;
3612         rdev->config.cik.multi_gpu_tile_size = 64;
3613
3614         /* fix up row size */
3615         gb_addr_config &= ~ROW_SIZE_MASK;
3616         switch (rdev->config.cik.mem_row_size_in_kb) {
3617         case 1:
3618         default:
3619                 gb_addr_config |= ROW_SIZE(0);
3620                 break;
3621         case 2:
3622                 gb_addr_config |= ROW_SIZE(1);
3623                 break;
3624         case 4:
3625                 gb_addr_config |= ROW_SIZE(2);
3626                 break;
3627         }
3628
3629         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3630          * not have bank info, so create a custom tiling dword.
3631          * bits 3:0   num_pipes
3632          * bits 7:4   num_banks
3633          * bits 11:8  group_size
3634          * bits 15:12 row_size
3635          */
3636         rdev->config.cik.tile_config = 0;
3637         switch (rdev->config.cik.num_tile_pipes) {
3638         case 1:
3639                 rdev->config.cik.tile_config |= (0 << 0);
3640                 break;
3641         case 2:
3642                 rdev->config.cik.tile_config |= (1 << 0);
3643                 break;
3644         case 4:
3645                 rdev->config.cik.tile_config |= (2 << 0);
3646                 break;
3647         case 8:
3648         default:
3649                 /* XXX what about 12? */
3650                 rdev->config.cik.tile_config |= (3 << 0);
3651                 break;
3652         }
3653         rdev->config.cik.tile_config |=
3654                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3655         rdev->config.cik.tile_config |=
3656                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3657         rdev->config.cik.tile_config |=
3658                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3659
3660         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3661         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3662         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3663         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3664         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3665         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3666         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3667         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3668
3669         cik_tiling_mode_table_init(rdev);
3670
3671         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3672                      rdev->config.cik.max_sh_per_se,
3673                      rdev->config.cik.max_backends_per_se);
3674
3675         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3676                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3677                         rdev->config.cik.active_cus +=
3678                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3679                 }
3680         }
3681
3682         /* set HW defaults for 3D engine */
3683         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3684
3685         WREG32(SX_DEBUG_1, 0x20);
3686
3687         WREG32(TA_CNTL_AUX, 0x00010000);
3688
3689         tmp = RREG32(SPI_CONFIG_CNTL);
3690         tmp |= 0x03000000;
3691         WREG32(SPI_CONFIG_CNTL, tmp);
3692
3693         WREG32(SQ_CONFIG, 1);
3694
3695         WREG32(DB_DEBUG, 0);
3696
3697         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3698         tmp |= 0x00000400;
3699         WREG32(DB_DEBUG2, tmp);
3700
3701         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3702         tmp |= 0x00020200;
3703         WREG32(DB_DEBUG3, tmp);
3704
3705         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3706         tmp |= 0x00018208;
3707         WREG32(CB_HW_CONTROL, tmp);
3708
3709         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3710
3711         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3712                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3713                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3714                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3715
3716         WREG32(VGT_NUM_INSTANCES, 1);
3717
3718         WREG32(CP_PERFMON_CNTL, 0);
3719
3720         WREG32(SQ_CONFIG, 0);
3721
3722         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3723                                           FORCE_EOV_MAX_REZ_CNT(255)));
3724
3725         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3726                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3727
3728         WREG32(VGT_GS_VERTEX_REUSE, 16);
3729         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3730
3731         tmp = RREG32(HDP_MISC_CNTL);
3732         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3733         WREG32(HDP_MISC_CNTL, tmp);
3734
3735         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3736         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3737
3738         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3739         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3740
3741         udelay(50);
3742 }
3743
3744 /*
3745  * GPU scratch registers helpers function.
3746  */
3747 /**
3748  * cik_scratch_init - setup driver info for CP scratch regs
3749  *
3750  * @rdev: radeon_device pointer
3751  *
3752  * Set up the number and offset of the CP scratch registers.
3753  * NOTE: use of CP scratch registers is a legacy inferface and
3754  * is not used by default on newer asics (r6xx+).  On newer asics,
3755  * memory buffers are used for fences rather than scratch regs.
3756  */
3757 static void cik_scratch_init(struct radeon_device *rdev)
3758 {
3759         int i;
3760
3761         rdev->scratch.num_reg = 7;
3762         rdev->scratch.reg_base = SCRATCH_REG0;
3763         for (i = 0; i < rdev->scratch.num_reg; i++) {
3764                 rdev->scratch.free[i] = true;
3765                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3766         }
3767 }
3768
3769 /**
3770  * cik_ring_test - basic gfx ring test
3771  *
3772  * @rdev: radeon_device pointer
3773  * @ring: radeon_ring structure holding ring information
3774  *
3775  * Allocate a scratch register and write to it using the gfx ring (CIK).
3776  * Provides a basic gfx ring test to verify that the ring is working.
3777  * Used by cik_cp_gfx_resume();
3778  * Returns 0 on success, error on failure.
3779  */
3780 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3781 {
3782         uint32_t scratch;
3783         uint32_t tmp = 0;
3784         unsigned i;
3785         int r;
3786
3787         r = radeon_scratch_get(rdev, &scratch);
3788         if (r) {
3789                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3790                 return r;
3791         }
3792         WREG32(scratch, 0xCAFEDEAD);
3793         r = radeon_ring_lock(rdev, ring, 3);
3794         if (r) {
3795                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3796                 radeon_scratch_free(rdev, scratch);
3797                 return r;
3798         }
3799         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3800         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3801         radeon_ring_write(ring, 0xDEADBEEF);
3802         radeon_ring_unlock_commit(rdev, ring, false);
3803
3804         for (i = 0; i < rdev->usec_timeout; i++) {
3805                 tmp = RREG32(scratch);
3806                 if (tmp == 0xDEADBEEF)
3807                         break;
3808                 DRM_UDELAY(1);
3809         }
3810         if (i < rdev->usec_timeout) {
3811                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3812         } else {
3813                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3814                           ring->idx, scratch, tmp);
3815                 r = -EINVAL;
3816         }
3817         radeon_scratch_free(rdev, scratch);
3818         return r;
3819 }
3820
3821 /**
3822  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3823  *
3824  * @rdev: radeon_device pointer
3825  * @ridx: radeon ring index
3826  *
3827  * Emits an hdp flush on the cp.
3828  */
3829 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3830                                        int ridx)
3831 {
3832         struct radeon_ring *ring = &rdev->ring[ridx];
3833         u32 ref_and_mask;
3834
3835         switch (ring->idx) {
3836         case CAYMAN_RING_TYPE_CP1_INDEX:
3837         case CAYMAN_RING_TYPE_CP2_INDEX:
3838         default:
3839                 switch (ring->me) {
3840                 case 0:
3841                         ref_and_mask = CP2 << ring->pipe;
3842                         break;
3843                 case 1:
3844                         ref_and_mask = CP6 << ring->pipe;
3845                         break;
3846                 default:
3847                         return;
3848                 }
3849                 break;
3850         case RADEON_RING_TYPE_GFX_INDEX:
3851                 ref_and_mask = CP0;
3852                 break;
3853         }
3854
3855         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3856         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3857                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3858                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3859         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3860         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3861         radeon_ring_write(ring, ref_and_mask);
3862         radeon_ring_write(ring, ref_and_mask);
3863         radeon_ring_write(ring, 0x20); /* poll interval */
3864 }
3865
3866 /**
3867  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3868  *
3869  * @rdev: radeon_device pointer
3870  * @fence: radeon fence object
3871  *
3872  * Emits a fence sequnce number on the gfx ring and flushes
3873  * GPU caches.
3874  */
3875 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3876                              struct radeon_fence *fence)
3877 {
3878         struct radeon_ring *ring = &rdev->ring[fence->ring];
3879         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3880
3881         /* EVENT_WRITE_EOP - flush caches, send int */
3882         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3883         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3884                                  EOP_TC_ACTION_EN |
3885                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3886                                  EVENT_INDEX(5)));
3887         radeon_ring_write(ring, addr & 0xfffffffc);
3888         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3889         radeon_ring_write(ring, fence->seq);
3890         radeon_ring_write(ring, 0);
3891 }
3892
3893 /**
3894  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3895  *
3896  * @rdev: radeon_device pointer
3897  * @fence: radeon fence object
3898  *
3899  * Emits a fence sequnce number on the compute ring and flushes
3900  * GPU caches.
3901  */
3902 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3903                                  struct radeon_fence *fence)
3904 {
3905         struct radeon_ring *ring = &rdev->ring[fence->ring];
3906         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3907
3908         /* RELEASE_MEM - flush caches, send int */
3909         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3910         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3911                                  EOP_TC_ACTION_EN |
3912                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3913                                  EVENT_INDEX(5)));
3914         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3915         radeon_ring_write(ring, addr & 0xfffffffc);
3916         radeon_ring_write(ring, upper_32_bits(addr));
3917         radeon_ring_write(ring, fence->seq);
3918         radeon_ring_write(ring, 0);
3919 }
3920
3921 /**
3922  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3923  *
3924  * @rdev: radeon_device pointer
3925  * @ring: radeon ring buffer object
3926  * @semaphore: radeon semaphore object
3927  * @emit_wait: Is this a sempahore wait?
3928  *
3929  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3930  * from running ahead of semaphore waits.
3931  */
3932 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3933                              struct radeon_ring *ring,
3934                              struct radeon_semaphore *semaphore,
3935                              bool emit_wait)
3936 {
3937         uint64_t addr = semaphore->gpu_addr;
3938         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3939
3940         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3941         radeon_ring_write(ring, lower_32_bits(addr));
3942         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3943
3944         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3945                 /* Prevent the PFP from running ahead of the semaphore wait */
3946                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3947                 radeon_ring_write(ring, 0x0);
3948         }
3949
3950         return true;
3951 }
3952
3953 /**
3954  * cik_copy_cpdma - copy pages using the CP DMA engine
3955  *
3956  * @rdev: radeon_device pointer
3957  * @src_offset: src GPU address
3958  * @dst_offset: dst GPU address
3959  * @num_gpu_pages: number of GPU pages to xfer
3960  * @fence: radeon fence object
3961  *
3962  * Copy GPU paging using the CP DMA engine (CIK+).
3963  * Used by the radeon ttm implementation to move pages if
3964  * registered as the asic copy callback.
3965  */
3966 int cik_copy_cpdma(struct radeon_device *rdev,
3967                    uint64_t src_offset, uint64_t dst_offset,
3968                    unsigned num_gpu_pages,
3969                    struct radeon_fence **fence)
3970 {
3971         struct radeon_semaphore *sem = NULL;
3972         int ring_index = rdev->asic->copy.blit_ring_index;
3973         struct radeon_ring *ring = &rdev->ring[ring_index];
3974         u32 size_in_bytes, cur_size_in_bytes, control;
3975         int i, num_loops;
3976         int r = 0;
3977
3978         r = radeon_semaphore_create(rdev, &sem);
3979         if (r) {
3980                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3981                 return r;
3982         }
3983
3984         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3985         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3986         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3987         if (r) {
3988                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3989                 radeon_semaphore_free(rdev, &sem, NULL);
3990                 return r;
3991         }
3992
3993         radeon_semaphore_sync_to(sem, *fence);
3994         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3995
3996         for (i = 0; i < num_loops; i++) {
3997                 cur_size_in_bytes = size_in_bytes;
3998                 if (cur_size_in_bytes > 0x1fffff)
3999                         cur_size_in_bytes = 0x1fffff;
4000                 size_in_bytes -= cur_size_in_bytes;
4001                 control = 0;
4002                 if (size_in_bytes == 0)
4003                         control |= PACKET3_DMA_DATA_CP_SYNC;
4004                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4005                 radeon_ring_write(ring, control);
4006                 radeon_ring_write(ring, lower_32_bits(src_offset));
4007                 radeon_ring_write(ring, upper_32_bits(src_offset));
4008                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4009                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4010                 radeon_ring_write(ring, cur_size_in_bytes);
4011                 src_offset += cur_size_in_bytes;
4012                 dst_offset += cur_size_in_bytes;
4013         }
4014
4015         r = radeon_fence_emit(rdev, fence, ring->idx);
4016         if (r) {
4017                 radeon_ring_unlock_undo(rdev, ring);
4018                 radeon_semaphore_free(rdev, &sem, NULL);
4019                 return r;
4020         }
4021
4022         radeon_ring_unlock_commit(rdev, ring, false);
4023         radeon_semaphore_free(rdev, &sem, *fence);
4024
4025         return r;
4026 }
4027
4028 /*
4029  * IB stuff
4030  */
4031 /**
4032  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4033  *
4034  * @rdev: radeon_device pointer
4035  * @ib: radeon indirect buffer object
4036  *
4037  * Emits an DE (drawing engine) or CE (constant engine) IB
4038  * on the gfx ring.  IBs are usually generated by userspace
4039  * acceleration drivers and submitted to the kernel for
4040  * sheduling on the ring.  This function schedules the IB
4041  * on the gfx ring for execution by the GPU.
4042  */
4043 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4044 {
4045         struct radeon_ring *ring = &rdev->ring[ib->ring];
4046         u32 header, control = INDIRECT_BUFFER_VALID;
4047
4048         if (ib->is_const_ib) {
4049                 /* set switch buffer packet before const IB */
4050                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4051                 radeon_ring_write(ring, 0);
4052
4053                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4054         } else {
4055                 u32 next_rptr;
4056                 if (ring->rptr_save_reg) {
4057                         next_rptr = ring->wptr + 3 + 4;
4058                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4059                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4060                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4061                         radeon_ring_write(ring, next_rptr);
4062                 } else if (rdev->wb.enabled) {
4063                         next_rptr = ring->wptr + 5 + 4;
4064                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4065                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4066                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4067                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4068                         radeon_ring_write(ring, next_rptr);
4069                 }
4070
4071                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4072         }
4073
4074         control |= ib->length_dw |
4075                 (ib->vm ? (ib->vm->id << 24) : 0);
4076
4077         radeon_ring_write(ring, header);
4078         radeon_ring_write(ring,
4079 #ifdef __BIG_ENDIAN
4080                           (2 << 0) |
4081 #endif
4082                           (ib->gpu_addr & 0xFFFFFFFC));
4083         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4084         radeon_ring_write(ring, control);
4085 }
4086
4087 /**
4088  * cik_ib_test - basic gfx ring IB test
4089  *
4090  * @rdev: radeon_device pointer
4091  * @ring: radeon_ring structure holding ring information
4092  *
4093  * Allocate an IB and execute it on the gfx ring (CIK).
4094  * Provides a basic gfx ring test to verify that IBs are working.
4095  * Returns 0 on success, error on failure.
4096  */
4097 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4098 {
4099         struct radeon_ib ib;
4100         uint32_t scratch;
4101         uint32_t tmp = 0;
4102         unsigned i;
4103         int r;
4104
4105         r = radeon_scratch_get(rdev, &scratch);
4106         if (r) {
4107                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4108                 return r;
4109         }
4110         WREG32(scratch, 0xCAFEDEAD);
4111         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4112         if (r) {
4113                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4114                 radeon_scratch_free(rdev, scratch);
4115                 return r;
4116         }
4117         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4118         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4119         ib.ptr[2] = 0xDEADBEEF;
4120         ib.length_dw = 3;
4121         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4122         if (r) {
4123                 radeon_scratch_free(rdev, scratch);
4124                 radeon_ib_free(rdev, &ib);
4125                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4126                 return r;
4127         }
4128         r = radeon_fence_wait(ib.fence, false);
4129         if (r) {
4130                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4131                 radeon_scratch_free(rdev, scratch);
4132                 radeon_ib_free(rdev, &ib);
4133                 return r;
4134         }
4135         for (i = 0; i < rdev->usec_timeout; i++) {
4136                 tmp = RREG32(scratch);
4137                 if (tmp == 0xDEADBEEF)
4138                         break;
4139                 DRM_UDELAY(1);
4140         }
4141         if (i < rdev->usec_timeout) {
4142                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4143         } else {
4144                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4145                           scratch, tmp);
4146                 r = -EINVAL;
4147         }
4148         radeon_scratch_free(rdev, scratch);
4149         radeon_ib_free(rdev, &ib);
4150         return r;
4151 }
4152
4153 /*
4154  * CP.
4155  * On CIK, gfx and compute now have independant command processors.
4156  *
4157  * GFX
4158  * Gfx consists of a single ring and can process both gfx jobs and
4159  * compute jobs.  The gfx CP consists of three microengines (ME):
4160  * PFP - Pre-Fetch Parser
4161  * ME - Micro Engine
4162  * CE - Constant Engine
4163  * The PFP and ME make up what is considered the Drawing Engine (DE).
4164  * The CE is an asynchronous engine used for updating buffer desciptors
4165  * used by the DE so that they can be loaded into cache in parallel
4166  * while the DE is processing state update packets.
4167  *
4168  * Compute
4169  * The compute CP consists of two microengines (ME):
4170  * MEC1 - Compute MicroEngine 1
4171  * MEC2 - Compute MicroEngine 2
4172  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4173  * The queues are exposed to userspace and are programmed directly
4174  * by the compute runtime.
4175  */
4176 /**
4177  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4178  *
4179  * @rdev: radeon_device pointer
4180  * @enable: enable or disable the MEs
4181  *
4182  * Halts or unhalts the gfx MEs.
4183  */
4184 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4185 {
4186         if (enable)
4187                 WREG32(CP_ME_CNTL, 0);
4188         else {
4189                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4190                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4191                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4192                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4193         }
4194         udelay(50);
4195 }
4196
4197 /**
4198  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4199  *
4200  * @rdev: radeon_device pointer
4201  *
4202  * Loads the gfx PFP, ME, and CE ucode.
4203  * Returns 0 for success, -EINVAL if the ucode is not available.
4204  */
4205 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4206 {
4207         int i;
4208
4209         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4210                 return -EINVAL;
4211
4212         cik_cp_gfx_enable(rdev, false);
4213
4214         if (rdev->new_fw) {
4215                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4216                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4217                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4218                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4219                 const struct gfx_firmware_header_v1_0 *me_hdr =
4220                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4221                 const __le32 *fw_data;
4222                 u32 fw_size;
4223
4224                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4225                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4226                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4227
4228                 /* PFP */
4229                 fw_data = (const __le32 *)
4230                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4231                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4232                 WREG32(CP_PFP_UCODE_ADDR, 0);
4233                 for (i = 0; i < fw_size; i++)
4234                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4235                 WREG32(CP_PFP_UCODE_ADDR, 0);
4236
4237                 /* CE */
4238                 fw_data = (const __le32 *)
4239                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4240                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4241                 WREG32(CP_CE_UCODE_ADDR, 0);
4242                 for (i = 0; i < fw_size; i++)
4243                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4244                 WREG32(CP_CE_UCODE_ADDR, 0);
4245
4246                 /* ME */
4247                 fw_data = (const __be32 *)
4248                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4249                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4250                 WREG32(CP_ME_RAM_WADDR, 0);
4251                 for (i = 0; i < fw_size; i++)
4252                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4253                 WREG32(CP_ME_RAM_WADDR, 0);
4254         } else {
4255                 const __be32 *fw_data;
4256
4257                 /* PFP */
4258                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4259                 WREG32(CP_PFP_UCODE_ADDR, 0);
4260                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4261                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4262                 WREG32(CP_PFP_UCODE_ADDR, 0);
4263
4264                 /* CE */
4265                 fw_data = (const __be32 *)rdev->ce_fw->data;
4266                 WREG32(CP_CE_UCODE_ADDR, 0);
4267                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4268                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4269                 WREG32(CP_CE_UCODE_ADDR, 0);
4270
4271                 /* ME */
4272                 fw_data = (const __be32 *)rdev->me_fw->data;
4273                 WREG32(CP_ME_RAM_WADDR, 0);
4274                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4275                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4276                 WREG32(CP_ME_RAM_WADDR, 0);
4277         }
4278
4279         WREG32(CP_PFP_UCODE_ADDR, 0);
4280         WREG32(CP_CE_UCODE_ADDR, 0);
4281         WREG32(CP_ME_RAM_WADDR, 0);
4282         WREG32(CP_ME_RAM_RADDR, 0);
4283         return 0;
4284 }
4285
4286 /**
4287  * cik_cp_gfx_start - start the gfx ring
4288  *
4289  * @rdev: radeon_device pointer
4290  *
4291  * Enables the ring and loads the clear state context and other
4292  * packets required to init the ring.
4293  * Returns 0 for success, error for failure.
4294  */
4295 static int cik_cp_gfx_start(struct radeon_device *rdev)
4296 {
4297         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4298         int r, i;
4299
4300         /* init the CP */
4301         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4302         WREG32(CP_ENDIAN_SWAP, 0);
4303         WREG32(CP_DEVICE_ID, 1);
4304
4305         cik_cp_gfx_enable(rdev, true);
4306
4307         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4308         if (r) {
4309                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4310                 return r;
4311         }
4312
4313         /* init the CE partitions.  CE only used for gfx on CIK */
4314         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4315         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4316         radeon_ring_write(ring, 0xc000);
4317         radeon_ring_write(ring, 0xc000);
4318
4319         /* setup clear context state */
4320         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4321         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4322
4323         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4324         radeon_ring_write(ring, 0x80000000);
4325         radeon_ring_write(ring, 0x80000000);
4326
4327         for (i = 0; i < cik_default_size; i++)
4328                 radeon_ring_write(ring, cik_default_state[i]);
4329
4330         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4331         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4332
4333         /* set clear context state */
4334         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4335         radeon_ring_write(ring, 0);
4336
4337         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4338         radeon_ring_write(ring, 0x00000316);
4339         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4340         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4341
4342         radeon_ring_unlock_commit(rdev, ring, false);
4343
4344         return 0;
4345 }
4346
4347 /**
4348  * cik_cp_gfx_fini - stop the gfx ring
4349  *
4350  * @rdev: radeon_device pointer
4351  *
4352  * Stop the gfx ring and tear down the driver ring
4353  * info.
4354  */
4355 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4356 {
4357         cik_cp_gfx_enable(rdev, false);
4358         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4359 }
4360
4361 /**
4362  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Program the location and size of the gfx ring buffer
4367  * and test it to make sure it's working.
4368  * Returns 0 for success, error for failure.
4369  */
4370 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4371 {
4372         struct radeon_ring *ring;
4373         u32 tmp;
4374         u32 rb_bufsz;
4375         u64 rb_addr;
4376         int r;
4377
4378         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4379         if (rdev->family != CHIP_HAWAII)
4380                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4381
4382         /* Set the write pointer delay */
4383         WREG32(CP_RB_WPTR_DELAY, 0);
4384
4385         /* set the RB to use vmid 0 */
4386         WREG32(CP_RB_VMID, 0);
4387
4388         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4389
4390         /* ring 0 - compute and gfx */
4391         /* Set ring buffer size */
4392         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393         rb_bufsz = order_base_2(ring->ring_size / 8);
4394         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4395 #ifdef __BIG_ENDIAN
4396         tmp |= BUF_SWAP_32BIT;
4397 #endif
4398         WREG32(CP_RB0_CNTL, tmp);
4399
4400         /* Initialize the ring buffer's read and write pointers */
4401         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4402         ring->wptr = 0;
4403         WREG32(CP_RB0_WPTR, ring->wptr);
4404
4405         /* set the wb address wether it's enabled or not */
4406         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4407         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4408
4409         /* scratch register shadowing is no longer supported */
4410         WREG32(SCRATCH_UMSK, 0);
4411
4412         if (!rdev->wb.enabled)
4413                 tmp |= RB_NO_UPDATE;
4414
4415         mdelay(1);
4416         WREG32(CP_RB0_CNTL, tmp);
4417
4418         rb_addr = ring->gpu_addr >> 8;
4419         WREG32(CP_RB0_BASE, rb_addr);
4420         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4421
4422         /* start the ring */
4423         cik_cp_gfx_start(rdev);
4424         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4425         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4426         if (r) {
4427                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4428                 return r;
4429         }
4430
4431         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4432                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4433
4434         return 0;
4435 }
4436
4437 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4438                      struct radeon_ring *ring)
4439 {
4440         u32 rptr;
4441
4442         if (rdev->wb.enabled)
4443                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4444         else
4445                 rptr = RREG32(CP_RB0_RPTR);
4446
4447         return rptr;
4448 }
4449
4450 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4451                      struct radeon_ring *ring)
4452 {
4453         u32 wptr;
4454
4455         wptr = RREG32(CP_RB0_WPTR);
4456
4457         return wptr;
4458 }
4459
4460 void cik_gfx_set_wptr(struct radeon_device *rdev,
4461                       struct radeon_ring *ring)
4462 {
4463         WREG32(CP_RB0_WPTR, ring->wptr);
4464         (void)RREG32(CP_RB0_WPTR);
4465 }
4466
4467 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4468                          struct radeon_ring *ring)
4469 {
4470         u32 rptr;
4471
4472         if (rdev->wb.enabled) {
4473                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4474         } else {
4475                 mutex_lock(&rdev->srbm_mutex);
4476                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4477                 rptr = RREG32(CP_HQD_PQ_RPTR);
4478                 cik_srbm_select(rdev, 0, 0, 0, 0);
4479                 mutex_unlock(&rdev->srbm_mutex);
4480         }
4481
4482         return rptr;
4483 }
4484
4485 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4486                          struct radeon_ring *ring)
4487 {
4488         u32 wptr;
4489
4490         if (rdev->wb.enabled) {
4491                 /* XXX check if swapping is necessary on BE */
4492                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4493         } else {
4494                 mutex_lock(&rdev->srbm_mutex);
4495                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4496                 wptr = RREG32(CP_HQD_PQ_WPTR);
4497                 cik_srbm_select(rdev, 0, 0, 0, 0);
4498                 mutex_unlock(&rdev->srbm_mutex);
4499         }
4500
4501         return wptr;
4502 }
4503
4504 void cik_compute_set_wptr(struct radeon_device *rdev,
4505                           struct radeon_ring *ring)
4506 {
4507         /* XXX check if swapping is necessary on BE */
4508         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4509         WDOORBELL32(ring->doorbell_index, ring->wptr);
4510 }
4511
4512 /**
4513  * cik_cp_compute_enable - enable/disable the compute CP MEs
4514  *
4515  * @rdev: radeon_device pointer
4516  * @enable: enable or disable the MEs
4517  *
4518  * Halts or unhalts the compute MEs.
4519  */
4520 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4521 {
4522         if (enable)
4523                 WREG32(CP_MEC_CNTL, 0);
4524         else {
4525                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4526                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4527                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4528         }
4529         udelay(50);
4530 }
4531
4532 /**
4533  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4534  *
4535  * @rdev: radeon_device pointer
4536  *
4537  * Loads the compute MEC1&2 ucode.
4538  * Returns 0 for success, -EINVAL if the ucode is not available.
4539  */
4540 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4541 {
4542         int i;
4543
4544         if (!rdev->mec_fw)
4545                 return -EINVAL;
4546
4547         cik_cp_compute_enable(rdev, false);
4548
4549         if (rdev->new_fw) {
4550                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4551                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4552                 const __le32 *fw_data;
4553                 u32 fw_size;
4554
4555                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4556
4557                 /* MEC1 */
4558                 fw_data = (const __le32 *)
4559                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4560                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4561                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4562                 for (i = 0; i < fw_size; i++)
4563                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4564                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4565
4566                 /* MEC2 */
4567                 if (rdev->family == CHIP_KAVERI) {
4568                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4569                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4570
4571                         fw_data = (const __le32 *)
4572                                 (rdev->mec2_fw->data +
4573                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4574                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4575                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4576                         for (i = 0; i < fw_size; i++)
4577                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4578                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4579                 }
4580         } else {
4581                 const __be32 *fw_data;
4582
4583                 /* MEC1 */
4584                 fw_data = (const __be32 *)rdev->mec_fw->data;
4585                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4586                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4587                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4588                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4589
4590                 if (rdev->family == CHIP_KAVERI) {
4591                         /* MEC2 */
4592                         fw_data = (const __be32 *)rdev->mec_fw->data;
4593                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4594                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4595                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4596                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4597                 }
4598         }
4599
4600         return 0;
4601 }
4602
4603 /**
4604  * cik_cp_compute_start - start the compute queues
4605  *
4606  * @rdev: radeon_device pointer
4607  *
4608  * Enable the compute queues.
4609  * Returns 0 for success, error for failure.
4610  */
4611 static int cik_cp_compute_start(struct radeon_device *rdev)
4612 {
4613         cik_cp_compute_enable(rdev, true);
4614
4615         return 0;
4616 }
4617
4618 /**
4619  * cik_cp_compute_fini - stop the compute queues
4620  *
4621  * @rdev: radeon_device pointer
4622  *
4623  * Stop the compute queues and tear down the driver queue
4624  * info.
4625  */
4626 static void cik_cp_compute_fini(struct radeon_device *rdev)
4627 {
4628         int i, idx, r;
4629
4630         cik_cp_compute_enable(rdev, false);
4631
4632         for (i = 0; i < 2; i++) {
4633                 if (i == 0)
4634                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4635                 else
4636                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4637
4638                 if (rdev->ring[idx].mqd_obj) {
4639                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4640                         if (unlikely(r != 0))
4641                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4642
4643                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4644                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4645
4646                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4647                         rdev->ring[idx].mqd_obj = NULL;
4648                 }
4649         }
4650 }
4651
4652 static void cik_mec_fini(struct radeon_device *rdev)
4653 {
4654         int r;
4655
4656         if (rdev->mec.hpd_eop_obj) {
4657                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4658                 if (unlikely(r != 0))
4659                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4660                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4661                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4662
4663                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4664                 rdev->mec.hpd_eop_obj = NULL;
4665         }
4666 }
4667
4668 #define MEC_HPD_SIZE 2048
4669
4670 static int cik_mec_init(struct radeon_device *rdev)
4671 {
4672         int r;
4673         u32 *hpd;
4674
4675         /*
4676          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4677          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4678          */
4679         if (rdev->family == CHIP_KAVERI)
4680                 rdev->mec.num_mec = 2;
4681         else
4682                 rdev->mec.num_mec = 1;
4683         rdev->mec.num_pipe = 4;
4684         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4685
4686         if (rdev->mec.hpd_eop_obj == NULL) {
4687                 r = radeon_bo_create(rdev,
4688                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4689                                      PAGE_SIZE, true,
4690                                      RADEON_GEM_DOMAIN_GTT, 0, NULL,
4691                                      &rdev->mec.hpd_eop_obj);
4692                 if (r) {
4693                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4694                         return r;
4695                 }
4696         }
4697
4698         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4699         if (unlikely(r != 0)) {
4700                 cik_mec_fini(rdev);
4701                 return r;
4702         }
4703         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4704                           &rdev->mec.hpd_eop_gpu_addr);
4705         if (r) {
4706                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4707                 cik_mec_fini(rdev);
4708                 return r;
4709         }
4710         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4711         if (r) {
4712                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4713                 cik_mec_fini(rdev);
4714                 return r;
4715         }
4716
4717         /* clear memory.  Not sure if this is required or not */
4718         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4719
4720         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4721         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4722
4723         return 0;
4724 }
4725
4726 struct hqd_registers
4727 {
4728         u32 cp_mqd_base_addr;
4729         u32 cp_mqd_base_addr_hi;
4730         u32 cp_hqd_active;
4731         u32 cp_hqd_vmid;
4732         u32 cp_hqd_persistent_state;
4733         u32 cp_hqd_pipe_priority;
4734         u32 cp_hqd_queue_priority;
4735         u32 cp_hqd_quantum;
4736         u32 cp_hqd_pq_base;
4737         u32 cp_hqd_pq_base_hi;
4738         u32 cp_hqd_pq_rptr;
4739         u32 cp_hqd_pq_rptr_report_addr;
4740         u32 cp_hqd_pq_rptr_report_addr_hi;
4741         u32 cp_hqd_pq_wptr_poll_addr;
4742         u32 cp_hqd_pq_wptr_poll_addr_hi;
4743         u32 cp_hqd_pq_doorbell_control;
4744         u32 cp_hqd_pq_wptr;
4745         u32 cp_hqd_pq_control;
4746         u32 cp_hqd_ib_base_addr;
4747         u32 cp_hqd_ib_base_addr_hi;
4748         u32 cp_hqd_ib_rptr;
4749         u32 cp_hqd_ib_control;
4750         u32 cp_hqd_iq_timer;
4751         u32 cp_hqd_iq_rptr;
4752         u32 cp_hqd_dequeue_request;
4753         u32 cp_hqd_dma_offload;
4754         u32 cp_hqd_sema_cmd;
4755         u32 cp_hqd_msg_type;
4756         u32 cp_hqd_atomic0_preop_lo;
4757         u32 cp_hqd_atomic0_preop_hi;
4758         u32 cp_hqd_atomic1_preop_lo;
4759         u32 cp_hqd_atomic1_preop_hi;
4760         u32 cp_hqd_hq_scheduler0;
4761         u32 cp_hqd_hq_scheduler1;
4762         u32 cp_mqd_control;
4763 };
4764
4765 struct bonaire_mqd
4766 {
4767         u32 header;
4768         u32 dispatch_initiator;
4769         u32 dimensions[3];
4770         u32 start_idx[3];
4771         u32 num_threads[3];
4772         u32 pipeline_stat_enable;
4773         u32 perf_counter_enable;
4774         u32 pgm[2];
4775         u32 tba[2];
4776         u32 tma[2];
4777         u32 pgm_rsrc[2];
4778         u32 vmid;
4779         u32 resource_limits;
4780         u32 static_thread_mgmt01[2];
4781         u32 tmp_ring_size;
4782         u32 static_thread_mgmt23[2];
4783         u32 restart[3];
4784         u32 thread_trace_enable;
4785         u32 reserved1;
4786         u32 user_data[16];
4787         u32 vgtcs_invoke_count[2];
4788         struct hqd_registers queue_state;
4789         u32 dequeue_cntr;
4790         u32 interrupt_queue[64];
4791 };
4792
4793 /**
4794  * cik_cp_compute_resume - setup the compute queue registers
4795  *
4796  * @rdev: radeon_device pointer
4797  *
4798  * Program the compute queues and test them to make sure they
4799  * are working.
4800  * Returns 0 for success, error for failure.
4801  */
4802 static int cik_cp_compute_resume(struct radeon_device *rdev)
4803 {
4804         int r, i, idx;
4805         u32 tmp;
4806         bool use_doorbell = true;
4807         u64 hqd_gpu_addr;
4808         u64 mqd_gpu_addr;
4809         u64 eop_gpu_addr;
4810         u64 wb_gpu_addr;
4811         u32 *buf;
4812         struct bonaire_mqd *mqd;
4813
4814         r = cik_cp_compute_start(rdev);
4815         if (r)
4816                 return r;
4817
4818         /* fix up chicken bits */
4819         tmp = RREG32(CP_CPF_DEBUG);
4820         tmp |= (1 << 23);
4821         WREG32(CP_CPF_DEBUG, tmp);
4822
4823         /* init the pipes */
4824         mutex_lock(&rdev->srbm_mutex);
4825         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4826                 int me = (i < 4) ? 1 : 2;
4827                 int pipe = (i < 4) ? i : (i - 4);
4828
4829                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4830
4831                 cik_srbm_select(rdev, me, pipe, 0, 0);
4832
4833                 /* write the EOP addr */
4834                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4835                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4836
4837                 /* set the VMID assigned */
4838                 WREG32(CP_HPD_EOP_VMID, 0);
4839
4840                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4841                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4842                 tmp &= ~EOP_SIZE_MASK;
4843                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4844                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4845         }
4846         cik_srbm_select(rdev, 0, 0, 0, 0);
4847         mutex_unlock(&rdev->srbm_mutex);
4848
4849         /* init the queues.  Just two for now. */
4850         for (i = 0; i < 2; i++) {
4851                 if (i == 0)
4852                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4853                 else
4854                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4855
4856                 if (rdev->ring[idx].mqd_obj == NULL) {
4857                         r = radeon_bo_create(rdev,
4858                                              sizeof(struct bonaire_mqd),
4859                                              PAGE_SIZE, true,
4860                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4861                                              &rdev->ring[idx].mqd_obj);
4862                         if (r) {
4863                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4864                                 return r;
4865                         }
4866                 }
4867
4868                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4869                 if (unlikely(r != 0)) {
4870                         cik_cp_compute_fini(rdev);
4871                         return r;
4872                 }
4873                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4874                                   &mqd_gpu_addr);
4875                 if (r) {
4876                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4877                         cik_cp_compute_fini(rdev);
4878                         return r;
4879                 }
4880                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4881                 if (r) {
4882                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4883                         cik_cp_compute_fini(rdev);
4884                         return r;
4885                 }
4886
4887                 /* init the mqd struct */
4888                 memset(buf, 0, sizeof(struct bonaire_mqd));
4889
4890                 mqd = (struct bonaire_mqd *)buf;
4891                 mqd->header = 0xC0310800;
4892                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4893                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4894                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4895                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4896
4897                 mutex_lock(&rdev->srbm_mutex);
4898                 cik_srbm_select(rdev, rdev->ring[idx].me,
4899                                 rdev->ring[idx].pipe,
4900                                 rdev->ring[idx].queue, 0);
4901
4902                 /* disable wptr polling */
4903                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4904                 tmp &= ~WPTR_POLL_EN;
4905                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4906
4907                 /* enable doorbell? */
4908                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4909                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4910                 if (use_doorbell)
4911                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4912                 else
4913                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4914                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4915                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4916
4917                 /* disable the queue if it's active */
4918                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4919                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4920                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4921                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4922                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4923                         for (i = 0; i < rdev->usec_timeout; i++) {
4924                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4925                                         break;
4926                                 udelay(1);
4927                         }
4928                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4929                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4930                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4931                 }
4932
4933                 /* set the pointer to the MQD */
4934                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4935                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4936                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4937                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4938                 /* set MQD vmid to 0 */
4939                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4940                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4941                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4942
4943                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4944                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4945                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4946                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4947                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4948                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4949
4950                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4951                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4952                 mqd->queue_state.cp_hqd_pq_control &=
4953                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4954
4955                 mqd->queue_state.cp_hqd_pq_control |=
4956                         order_base_2(rdev->ring[idx].ring_size / 8);
4957                 mqd->queue_state.cp_hqd_pq_control |=
4958                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4959 #ifdef __BIG_ENDIAN
4960                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4961 #endif
4962                 mqd->queue_state.cp_hqd_pq_control &=
4963                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4964                 mqd->queue_state.cp_hqd_pq_control |=
4965                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4966                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4967
4968                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4969                 if (i == 0)
4970                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4971                 else
4972                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4973                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4974                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4975                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4976                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4977                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4978
4979                 /* set the wb address wether it's enabled or not */
4980                 if (i == 0)
4981                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4982                 else
4983                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4984                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4985                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4986                         upper_32_bits(wb_gpu_addr) & 0xffff;
4987                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4988                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4989                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4990                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4991
4992                 /* enable the doorbell if requested */
4993                 if (use_doorbell) {
4994                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4995                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4996                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4997                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4998                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4999                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5000                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5001                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5002
5003                 } else {
5004                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5005                 }
5006                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5007                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5008
5009                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5010                 rdev->ring[idx].wptr = 0;
5011                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5012                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5013                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5014
5015                 /* set the vmid for the queue */
5016                 mqd->queue_state.cp_hqd_vmid = 0;
5017                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5018
5019                 /* activate the queue */
5020                 mqd->queue_state.cp_hqd_active = 1;
5021                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5022
5023                 cik_srbm_select(rdev, 0, 0, 0, 0);
5024                 mutex_unlock(&rdev->srbm_mutex);
5025
5026                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5027                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5028
5029                 rdev->ring[idx].ready = true;
5030                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5031                 if (r)
5032                         rdev->ring[idx].ready = false;
5033         }
5034
5035         return 0;
5036 }
5037
5038 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5039 {
5040         cik_cp_gfx_enable(rdev, enable);
5041         cik_cp_compute_enable(rdev, enable);
5042 }
5043
5044 static int cik_cp_load_microcode(struct radeon_device *rdev)
5045 {
5046         int r;
5047
5048         r = cik_cp_gfx_load_microcode(rdev);
5049         if (r)
5050                 return r;
5051         r = cik_cp_compute_load_microcode(rdev);
5052         if (r)
5053                 return r;
5054
5055         return 0;
5056 }
5057
5058 static void cik_cp_fini(struct radeon_device *rdev)
5059 {
5060         cik_cp_gfx_fini(rdev);
5061         cik_cp_compute_fini(rdev);
5062 }
5063
5064 static int cik_cp_resume(struct radeon_device *rdev)
5065 {
5066         int r;
5067
5068         cik_enable_gui_idle_interrupt(rdev, false);
5069
5070         r = cik_cp_load_microcode(rdev);
5071         if (r)
5072                 return r;
5073
5074         r = cik_cp_gfx_resume(rdev);
5075         if (r)
5076                 return r;
5077         r = cik_cp_compute_resume(rdev);
5078         if (r)
5079                 return r;
5080
5081         cik_enable_gui_idle_interrupt(rdev, true);
5082
5083         return 0;
5084 }
5085
5086 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5087 {
5088         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5089                 RREG32(GRBM_STATUS));
5090         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5091                 RREG32(GRBM_STATUS2));
5092         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5093                 RREG32(GRBM_STATUS_SE0));
5094         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5095                 RREG32(GRBM_STATUS_SE1));
5096         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5097                 RREG32(GRBM_STATUS_SE2));
5098         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5099                 RREG32(GRBM_STATUS_SE3));
5100         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5101                 RREG32(SRBM_STATUS));
5102         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5103                 RREG32(SRBM_STATUS2));
5104         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5105                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5106         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5107                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5108         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5109         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5110                  RREG32(CP_STALLED_STAT1));
5111         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5112                  RREG32(CP_STALLED_STAT2));
5113         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5114                  RREG32(CP_STALLED_STAT3));
5115         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5116                  RREG32(CP_CPF_BUSY_STAT));
5117         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5118                  RREG32(CP_CPF_STALLED_STAT1));
5119         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5120         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5121         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5122                  RREG32(CP_CPC_STALLED_STAT1));
5123         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5124 }
5125
5126 /**
5127  * cik_gpu_check_soft_reset - check which blocks are busy
5128  *
5129  * @rdev: radeon_device pointer
5130  *
5131  * Check which blocks are busy and return the relevant reset
5132  * mask to be used by cik_gpu_soft_reset().
5133  * Returns a mask of the blocks to be reset.
5134  */
5135 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5136 {
5137         u32 reset_mask = 0;
5138         u32 tmp;
5139
5140         /* GRBM_STATUS */
5141         tmp = RREG32(GRBM_STATUS);
5142         if (tmp & (PA_BUSY | SC_BUSY |
5143                    BCI_BUSY | SX_BUSY |
5144                    TA_BUSY | VGT_BUSY |
5145                    DB_BUSY | CB_BUSY |
5146                    GDS_BUSY | SPI_BUSY |
5147                    IA_BUSY | IA_BUSY_NO_DMA))
5148                 reset_mask |= RADEON_RESET_GFX;
5149
5150         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5151                 reset_mask |= RADEON_RESET_CP;
5152
5153         /* GRBM_STATUS2 */
5154         tmp = RREG32(GRBM_STATUS2);
5155         if (tmp & RLC_BUSY)
5156                 reset_mask |= RADEON_RESET_RLC;
5157
5158         /* SDMA0_STATUS_REG */
5159         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5160         if (!(tmp & SDMA_IDLE))
5161                 reset_mask |= RADEON_RESET_DMA;
5162
5163         /* SDMA1_STATUS_REG */
5164         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5165         if (!(tmp & SDMA_IDLE))
5166                 reset_mask |= RADEON_RESET_DMA1;
5167
5168         /* SRBM_STATUS2 */
5169         tmp = RREG32(SRBM_STATUS2);
5170         if (tmp & SDMA_BUSY)
5171                 reset_mask |= RADEON_RESET_DMA;
5172
5173         if (tmp & SDMA1_BUSY)
5174                 reset_mask |= RADEON_RESET_DMA1;
5175
5176         /* SRBM_STATUS */
5177         tmp = RREG32(SRBM_STATUS);
5178
5179         if (tmp & IH_BUSY)
5180                 reset_mask |= RADEON_RESET_IH;
5181
5182         if (tmp & SEM_BUSY)
5183                 reset_mask |= RADEON_RESET_SEM;
5184
5185         if (tmp & GRBM_RQ_PENDING)
5186                 reset_mask |= RADEON_RESET_GRBM;
5187
5188         if (tmp & VMC_BUSY)
5189                 reset_mask |= RADEON_RESET_VMC;
5190
5191         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5192                    MCC_BUSY | MCD_BUSY))
5193                 reset_mask |= RADEON_RESET_MC;
5194
5195         if (evergreen_is_display_hung(rdev))
5196                 reset_mask |= RADEON_RESET_DISPLAY;
5197
5198         /* Skip MC reset as it's mostly likely not hung, just busy */
5199         if (reset_mask & RADEON_RESET_MC) {
5200                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5201                 reset_mask &= ~RADEON_RESET_MC;
5202         }
5203
5204         return reset_mask;
5205 }
5206
5207 /**
5208  * cik_gpu_soft_reset - soft reset GPU
5209  *
5210  * @rdev: radeon_device pointer
5211  * @reset_mask: mask of which blocks to reset
5212  *
5213  * Soft reset the blocks specified in @reset_mask.
5214  */
5215 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5216 {
5217         struct evergreen_mc_save save;
5218         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5219         u32 tmp;
5220
5221         if (reset_mask == 0)
5222                 return;
5223
5224         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5225
5226         cik_print_gpu_status_regs(rdev);
5227         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5228                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5229         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5230                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5231
5232         /* disable CG/PG */
5233         cik_fini_pg(rdev);
5234         cik_fini_cg(rdev);
5235
5236         /* stop the rlc */
5237         cik_rlc_stop(rdev);
5238
5239         /* Disable GFX parsing/prefetching */
5240         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5241
5242         /* Disable MEC parsing/prefetching */
5243         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5244
5245         if (reset_mask & RADEON_RESET_DMA) {
5246                 /* sdma0 */
5247                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5248                 tmp |= SDMA_HALT;
5249                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5250         }
5251         if (reset_mask & RADEON_RESET_DMA1) {
5252                 /* sdma1 */
5253                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5254                 tmp |= SDMA_HALT;
5255                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5256         }
5257
5258         evergreen_mc_stop(rdev, &save);
5259         if (evergreen_mc_wait_for_idle(rdev)) {
5260                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5261         }
5262
5263         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5264                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5265
5266         if (reset_mask & RADEON_RESET_CP) {
5267                 grbm_soft_reset |= SOFT_RESET_CP;
5268
5269                 srbm_soft_reset |= SOFT_RESET_GRBM;
5270         }
5271
5272         if (reset_mask & RADEON_RESET_DMA)
5273                 srbm_soft_reset |= SOFT_RESET_SDMA;
5274
5275         if (reset_mask & RADEON_RESET_DMA1)
5276                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5277
5278         if (reset_mask & RADEON_RESET_DISPLAY)
5279                 srbm_soft_reset |= SOFT_RESET_DC;
5280
5281         if (reset_mask & RADEON_RESET_RLC)
5282                 grbm_soft_reset |= SOFT_RESET_RLC;
5283
5284         if (reset_mask & RADEON_RESET_SEM)
5285                 srbm_soft_reset |= SOFT_RESET_SEM;
5286
5287         if (reset_mask & RADEON_RESET_IH)
5288                 srbm_soft_reset |= SOFT_RESET_IH;
5289
5290         if (reset_mask & RADEON_RESET_GRBM)
5291                 srbm_soft_reset |= SOFT_RESET_GRBM;
5292
5293         if (reset_mask & RADEON_RESET_VMC)
5294                 srbm_soft_reset |= SOFT_RESET_VMC;
5295
5296         if (!(rdev->flags & RADEON_IS_IGP)) {
5297                 if (reset_mask & RADEON_RESET_MC)
5298                         srbm_soft_reset |= SOFT_RESET_MC;
5299         }
5300
5301         if (grbm_soft_reset) {
5302                 tmp = RREG32(GRBM_SOFT_RESET);
5303                 tmp |= grbm_soft_reset;
5304                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5305                 WREG32(GRBM_SOFT_RESET, tmp);
5306                 tmp = RREG32(GRBM_SOFT_RESET);
5307
5308                 udelay(50);
5309
5310                 tmp &= ~grbm_soft_reset;
5311                 WREG32(GRBM_SOFT_RESET, tmp);
5312                 tmp = RREG32(GRBM_SOFT_RESET);
5313         }
5314
5315         if (srbm_soft_reset) {
5316                 tmp = RREG32(SRBM_SOFT_RESET);
5317                 tmp |= srbm_soft_reset;
5318                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5319                 WREG32(SRBM_SOFT_RESET, tmp);
5320                 tmp = RREG32(SRBM_SOFT_RESET);
5321
5322                 udelay(50);
5323
5324                 tmp &= ~srbm_soft_reset;
5325                 WREG32(SRBM_SOFT_RESET, tmp);
5326                 tmp = RREG32(SRBM_SOFT_RESET);
5327         }
5328
5329         /* Wait a little for things to settle down */
5330         udelay(50);
5331
5332         evergreen_mc_resume(rdev, &save);
5333         udelay(50);
5334
5335         cik_print_gpu_status_regs(rdev);
5336 }
5337
5338 struct kv_reset_save_regs {
5339         u32 gmcon_reng_execute;
5340         u32 gmcon_misc;
5341         u32 gmcon_misc3;
5342 };
5343
5344 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5345                                    struct kv_reset_save_regs *save)
5346 {
5347         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5348         save->gmcon_misc = RREG32(GMCON_MISC);
5349         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5350
5351         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5352         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5353                                                 STCTRL_STUTTER_EN));
5354 }
5355
5356 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5357                                       struct kv_reset_save_regs *save)
5358 {
5359         int i;
5360
5361         WREG32(GMCON_PGFSM_WRITE, 0);
5362         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5363
5364         for (i = 0; i < 5; i++)
5365                 WREG32(GMCON_PGFSM_WRITE, 0);
5366
5367         WREG32(GMCON_PGFSM_WRITE, 0);
5368         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5369
5370         for (i = 0; i < 5; i++)
5371                 WREG32(GMCON_PGFSM_WRITE, 0);
5372
5373         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5374         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5375
5376         for (i = 0; i < 5; i++)
5377                 WREG32(GMCON_PGFSM_WRITE, 0);
5378
5379         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5380         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5381
5382         for (i = 0; i < 5; i++)
5383                 WREG32(GMCON_PGFSM_WRITE, 0);
5384
5385         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5386         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5387
5388         for (i = 0; i < 5; i++)
5389                 WREG32(GMCON_PGFSM_WRITE, 0);
5390
5391         WREG32(GMCON_PGFSM_WRITE, 0);
5392         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5393
5394         for (i = 0; i < 5; i++)
5395                 WREG32(GMCON_PGFSM_WRITE, 0);
5396
5397         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5398         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5399
5400         for (i = 0; i < 5; i++)
5401                 WREG32(GMCON_PGFSM_WRITE, 0);
5402
5403         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5404         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5405
5406         for (i = 0; i < 5; i++)
5407                 WREG32(GMCON_PGFSM_WRITE, 0);
5408
5409         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5410         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5411
5412         for (i = 0; i < 5; i++)
5413                 WREG32(GMCON_PGFSM_WRITE, 0);
5414
5415         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5416         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5417
5418         for (i = 0; i < 5; i++)
5419                 WREG32(GMCON_PGFSM_WRITE, 0);
5420
5421         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5422         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5423
5424         WREG32(GMCON_MISC3, save->gmcon_misc3);
5425         WREG32(GMCON_MISC, save->gmcon_misc);
5426         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5427 }
5428
5429 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5430 {
5431         struct evergreen_mc_save save;
5432         struct kv_reset_save_regs kv_save = { 0 };
5433         u32 tmp, i;
5434
5435         dev_info(rdev->dev, "GPU pci config reset\n");
5436
5437         /* disable dpm? */
5438
5439         /* disable cg/pg */
5440         cik_fini_pg(rdev);
5441         cik_fini_cg(rdev);
5442
5443         /* Disable GFX parsing/prefetching */
5444         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5445
5446         /* Disable MEC parsing/prefetching */
5447         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5448
5449         /* sdma0 */
5450         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5451         tmp |= SDMA_HALT;
5452         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5453         /* sdma1 */
5454         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5455         tmp |= SDMA_HALT;
5456         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5457         /* XXX other engines? */
5458
5459         /* halt the rlc, disable cp internal ints */
5460         cik_rlc_stop(rdev);
5461
5462         udelay(50);
5463
5464         /* disable mem access */
5465         evergreen_mc_stop(rdev, &save);
5466         if (evergreen_mc_wait_for_idle(rdev)) {
5467                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5468         }
5469
5470         if (rdev->flags & RADEON_IS_IGP)
5471                 kv_save_regs_for_reset(rdev, &kv_save);
5472
5473         /* disable BM */
5474         pci_clear_master(rdev->pdev);
5475         /* reset */
5476         radeon_pci_config_reset(rdev);
5477
5478         udelay(100);
5479
5480         /* wait for asic to come out of reset */
5481         for (i = 0; i < rdev->usec_timeout; i++) {
5482                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5483                         break;
5484                 udelay(1);
5485         }
5486
5487         /* does asic init need to be run first??? */
5488         if (rdev->flags & RADEON_IS_IGP)
5489                 kv_restore_regs_for_reset(rdev, &kv_save);
5490 }
5491
5492 /**
5493  * cik_asic_reset - soft reset GPU
5494  *
5495  * @rdev: radeon_device pointer
5496  *
5497  * Look up which blocks are hung and attempt
5498  * to reset them.
5499  * Returns 0 for success.
5500  */
5501 int cik_asic_reset(struct radeon_device *rdev)
5502 {
5503         u32 reset_mask;
5504
5505         reset_mask = cik_gpu_check_soft_reset(rdev);
5506
5507         if (reset_mask)
5508                 r600_set_bios_scratch_engine_hung(rdev, true);
5509
5510         /* try soft reset */
5511         cik_gpu_soft_reset(rdev, reset_mask);
5512
5513         reset_mask = cik_gpu_check_soft_reset(rdev);
5514
5515         /* try pci config reset */
5516         if (reset_mask && radeon_hard_reset)
5517                 cik_gpu_pci_config_reset(rdev);
5518
5519         reset_mask = cik_gpu_check_soft_reset(rdev);
5520
5521         if (!reset_mask)
5522                 r600_set_bios_scratch_engine_hung(rdev, false);
5523
5524         return 0;
5525 }
5526
5527 /**
5528  * cik_gfx_is_lockup - check if the 3D engine is locked up
5529  *
5530  * @rdev: radeon_device pointer
5531  * @ring: radeon_ring structure holding ring information
5532  *
5533  * Check if the 3D engine is locked up (CIK).
5534  * Returns true if the engine is locked, false if not.
5535  */
5536 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5537 {
5538         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5539
5540         if (!(reset_mask & (RADEON_RESET_GFX |
5541                             RADEON_RESET_COMPUTE |
5542                             RADEON_RESET_CP))) {
5543                 radeon_ring_lockup_update(rdev, ring);
5544                 return false;
5545         }
5546         return radeon_ring_test_lockup(rdev, ring);
5547 }
5548
5549 /* MC */
5550 /**
5551  * cik_mc_program - program the GPU memory controller
5552  *
5553  * @rdev: radeon_device pointer
5554  *
5555  * Set the location of vram, gart, and AGP in the GPU's
5556  * physical address space (CIK).
5557  */
5558 static void cik_mc_program(struct radeon_device *rdev)
5559 {
5560         struct evergreen_mc_save save;
5561         u32 tmp;
5562         int i, j;
5563
5564         /* Initialize HDP */
5565         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5566                 WREG32((0x2c14 + j), 0x00000000);
5567                 WREG32((0x2c18 + j), 0x00000000);
5568                 WREG32((0x2c1c + j), 0x00000000);
5569                 WREG32((0x2c20 + j), 0x00000000);
5570                 WREG32((0x2c24 + j), 0x00000000);
5571         }
5572         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5573
5574         evergreen_mc_stop(rdev, &save);
5575         if (radeon_mc_wait_for_idle(rdev)) {
5576                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5577         }
5578         /* Lockout access through VGA aperture*/
5579         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5580         /* Update configuration */
5581         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5582                rdev->mc.vram_start >> 12);
5583         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5584                rdev->mc.vram_end >> 12);
5585         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5586                rdev->vram_scratch.gpu_addr >> 12);
5587         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5588         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5589         WREG32(MC_VM_FB_LOCATION, tmp);
5590         /* XXX double check these! */
5591         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5592         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5593         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5594         WREG32(MC_VM_AGP_BASE, 0);
5595         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5596         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5597         if (radeon_mc_wait_for_idle(rdev)) {
5598                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5599         }
5600         evergreen_mc_resume(rdev, &save);
5601         /* we need to own VRAM, so turn off the VGA renderer here
5602          * to stop it overwriting our objects */
5603         rv515_vga_render_disable(rdev);
5604 }
5605
5606 /**
5607  * cik_mc_init - initialize the memory controller driver params
5608  *
5609  * @rdev: radeon_device pointer
5610  *
5611  * Look up the amount of vram, vram width, and decide how to place
5612  * vram and gart within the GPU's physical address space (CIK).
5613  * Returns 0 for success.
5614  */
5615 static int cik_mc_init(struct radeon_device *rdev)
5616 {
5617         u32 tmp;
5618         int chansize, numchan;
5619
5620         /* Get VRAM informations */
5621         rdev->mc.vram_is_ddr = true;
5622         tmp = RREG32(MC_ARB_RAMCFG);
5623         if (tmp & CHANSIZE_MASK) {
5624                 chansize = 64;
5625         } else {
5626                 chansize = 32;
5627         }
5628         tmp = RREG32(MC_SHARED_CHMAP);
5629         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5630         case 0:
5631         default:
5632                 numchan = 1;
5633                 break;
5634         case 1:
5635                 numchan = 2;
5636                 break;
5637         case 2:
5638                 numchan = 4;
5639                 break;
5640         case 3:
5641                 numchan = 8;
5642                 break;
5643         case 4:
5644                 numchan = 3;
5645                 break;
5646         case 5:
5647                 numchan = 6;
5648                 break;
5649         case 6:
5650                 numchan = 10;
5651                 break;
5652         case 7:
5653                 numchan = 12;
5654                 break;
5655         case 8:
5656                 numchan = 16;
5657                 break;
5658         }
5659         rdev->mc.vram_width = numchan * chansize;
5660         /* Could aper size report 0 ? */
5661         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5662         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5663         /* size in MB on si */
5664         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5665         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5666         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5667         si_vram_gtt_location(rdev, &rdev->mc);
5668         radeon_update_bandwidth_info(rdev);
5669
5670         return 0;
5671 }
5672
5673 /*
5674  * GART
5675  * VMID 0 is the physical GPU addresses as used by the kernel.
5676  * VMIDs 1-15 are used for userspace clients and are handled
5677  * by the radeon vm/hsa code.
5678  */
5679 /**
5680  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5681  *
5682  * @rdev: radeon_device pointer
5683  *
5684  * Flush the TLB for the VMID 0 page table (CIK).
5685  */
5686 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5687 {
5688         /* flush hdp cache */
5689         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5690
5691         /* bits 0-15 are the VM contexts0-15 */
5692         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5693 }
5694
5695 /**
5696  * cik_pcie_gart_enable - gart enable
5697  *
5698  * @rdev: radeon_device pointer
5699  *
5700  * This sets up the TLBs, programs the page tables for VMID0,
5701  * sets up the hw for VMIDs 1-15 which are allocated on
5702  * demand, and sets up the global locations for the LDS, GDS,
5703  * and GPUVM for FSA64 clients (CIK).
5704  * Returns 0 for success, errors for failure.
5705  */
5706 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5707 {
5708         int r, i;
5709
5710         if (rdev->gart.robj == NULL) {
5711                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5712                 return -EINVAL;
5713         }
5714         r = radeon_gart_table_vram_pin(rdev);
5715         if (r)
5716                 return r;
5717         /* Setup TLB control */
5718         WREG32(MC_VM_MX_L1_TLB_CNTL,
5719                (0xA << 7) |
5720                ENABLE_L1_TLB |
5721                ENABLE_L1_FRAGMENT_PROCESSING |
5722                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5723                ENABLE_ADVANCED_DRIVER_MODEL |
5724                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5725         /* Setup L2 cache */
5726         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5727                ENABLE_L2_FRAGMENT_PROCESSING |
5728                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5729                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5730                EFFECTIVE_L2_QUEUE_SIZE(7) |
5731                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5732         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5733         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5734                BANK_SELECT(4) |
5735                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5736         /* setup context0 */
5737         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5738         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5739         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5740         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5741                         (u32)(rdev->dummy_page.addr >> 12));
5742         WREG32(VM_CONTEXT0_CNTL2, 0);
5743         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5744                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5745
5746         WREG32(0x15D4, 0);
5747         WREG32(0x15D8, 0);
5748         WREG32(0x15DC, 0);
5749
5750         /* empty context1-15 */
5751         /* FIXME start with 4G, once using 2 level pt switch to full
5752          * vm size space
5753          */
5754         /* set vm size, must be a multiple of 4 */
5755         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5756         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5757         for (i = 1; i < 16; i++) {
5758                 if (i < 8)
5759                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5760                                rdev->gart.table_addr >> 12);
5761                 else
5762                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5763                                rdev->gart.table_addr >> 12);
5764         }
5765
5766         /* enable context1-15 */
5767         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5768                (u32)(rdev->dummy_page.addr >> 12));
5769         WREG32(VM_CONTEXT1_CNTL2, 4);
5770         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5771                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5772                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5773                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5774                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5775                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5776                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5777                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5778                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5779                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5780                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5781                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5782                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5783                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5784
5785         if (rdev->family == CHIP_KAVERI) {
5786                 u32 tmp = RREG32(CHUB_CONTROL);
5787                 tmp &= ~BYPASS_VM;
5788                 WREG32(CHUB_CONTROL, tmp);
5789         }
5790
5791         /* XXX SH_MEM regs */
5792         /* where to put LDS, scratch, GPUVM in FSA64 space */
5793         mutex_lock(&rdev->srbm_mutex);
5794         for (i = 0; i < 16; i++) {
5795                 cik_srbm_select(rdev, 0, 0, 0, i);
5796                 /* CP and shaders */
5797                 WREG32(SH_MEM_CONFIG, 0);
5798                 WREG32(SH_MEM_APE1_BASE, 1);
5799                 WREG32(SH_MEM_APE1_LIMIT, 0);
5800                 WREG32(SH_MEM_BASES, 0);
5801                 /* SDMA GFX */
5802                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5803                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5804                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5805                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5806                 /* XXX SDMA RLC - todo */
5807         }
5808         cik_srbm_select(rdev, 0, 0, 0, 0);
5809         mutex_unlock(&rdev->srbm_mutex);
5810
5811         cik_pcie_gart_tlb_flush(rdev);
5812         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5813                  (unsigned)(rdev->mc.gtt_size >> 20),
5814                  (unsigned long long)rdev->gart.table_addr);
5815         rdev->gart.ready = true;
5816         return 0;
5817 }
5818
5819 /**
5820  * cik_pcie_gart_disable - gart disable
5821  *
5822  * @rdev: radeon_device pointer
5823  *
5824  * This disables all VM page table (CIK).
5825  */
5826 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5827 {
5828         /* Disable all tables */
5829         WREG32(VM_CONTEXT0_CNTL, 0);
5830         WREG32(VM_CONTEXT1_CNTL, 0);
5831         /* Setup TLB control */
5832         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5833                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5834         /* Setup L2 cache */
5835         WREG32(VM_L2_CNTL,
5836                ENABLE_L2_FRAGMENT_PROCESSING |
5837                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5838                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5839                EFFECTIVE_L2_QUEUE_SIZE(7) |
5840                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5841         WREG32(VM_L2_CNTL2, 0);
5842         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5843                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5844         radeon_gart_table_vram_unpin(rdev);
5845 }
5846
5847 /**
5848  * cik_pcie_gart_fini - vm fini callback
5849  *
5850  * @rdev: radeon_device pointer
5851  *
5852  * Tears down the driver GART/VM setup (CIK).
5853  */
5854 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5855 {
5856         cik_pcie_gart_disable(rdev);
5857         radeon_gart_table_vram_free(rdev);
5858         radeon_gart_fini(rdev);
5859 }
5860
5861 /* vm parser */
5862 /**
5863  * cik_ib_parse - vm ib_parse callback
5864  *
5865  * @rdev: radeon_device pointer
5866  * @ib: indirect buffer pointer
5867  *
5868  * CIK uses hw IB checking so this is a nop (CIK).
5869  */
5870 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5871 {
5872         return 0;
5873 }
5874
5875 /*
5876  * vm
5877  * VMID 0 is the physical GPU addresses as used by the kernel.
5878  * VMIDs 1-15 are used for userspace clients and are handled
5879  * by the radeon vm/hsa code.
5880  */
5881 /**
5882  * cik_vm_init - cik vm init callback
5883  *
5884  * @rdev: radeon_device pointer
5885  *
5886  * Inits cik specific vm parameters (number of VMs, base of vram for
5887  * VMIDs 1-15) (CIK).
5888  * Returns 0 for success.
5889  */
5890 int cik_vm_init(struct radeon_device *rdev)
5891 {
5892         /* number of VMs */
5893         rdev->vm_manager.nvm = 16;
5894         /* base offset of vram pages */
5895         if (rdev->flags & RADEON_IS_IGP) {
5896                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5897                 tmp <<= 22;
5898                 rdev->vm_manager.vram_base_offset = tmp;
5899         } else
5900                 rdev->vm_manager.vram_base_offset = 0;
5901
5902         return 0;
5903 }
5904
5905 /**
5906  * cik_vm_fini - cik vm fini callback
5907  *
5908  * @rdev: radeon_device pointer
5909  *
5910  * Tear down any asic specific VM setup (CIK).
5911  */
5912 void cik_vm_fini(struct radeon_device *rdev)
5913 {
5914 }
5915
5916 /**
5917  * cik_vm_decode_fault - print human readable fault info
5918  *
5919  * @rdev: radeon_device pointer
5920  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5921  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5922  *
5923  * Print human readable fault information (CIK).
5924  */
5925 static void cik_vm_decode_fault(struct radeon_device *rdev,
5926                                 u32 status, u32 addr, u32 mc_client)
5927 {
5928         u32 mc_id;
5929         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5930         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5931         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5932                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5933
5934         if (rdev->family == CHIP_HAWAII)
5935                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5936         else
5937                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5938
5939         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5940                protections, vmid, addr,
5941                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5942                block, mc_client, mc_id);
5943 }
5944
5945 /**
5946  * cik_vm_flush - cik vm flush using the CP
5947  *
5948  * @rdev: radeon_device pointer
5949  *
5950  * Update the page table base and flush the VM TLB
5951  * using the CP (CIK).
5952  */
5953 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5954 {
5955         struct radeon_ring *ring = &rdev->ring[ridx];
5956         int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5957
5958         if (vm == NULL)
5959                 return;
5960
5961         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5962         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5963                                  WRITE_DATA_DST_SEL(0)));
5964         if (vm->id < 8) {
5965                 radeon_ring_write(ring,
5966                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5967         } else {
5968                 radeon_ring_write(ring,
5969                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5970         }
5971         radeon_ring_write(ring, 0);
5972         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5973
5974         /* update SH_MEM_* regs */
5975         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5976         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5977                                  WRITE_DATA_DST_SEL(0)));
5978         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5979         radeon_ring_write(ring, 0);
5980         radeon_ring_write(ring, VMID(vm->id));
5981
5982         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5983         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5984                                  WRITE_DATA_DST_SEL(0)));
5985         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5986         radeon_ring_write(ring, 0);
5987
5988         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5989         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5990         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5991         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5992
5993         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5994         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5995                                  WRITE_DATA_DST_SEL(0)));
5996         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5997         radeon_ring_write(ring, 0);
5998         radeon_ring_write(ring, VMID(0));
5999
6000         /* HDP flush */
6001         cik_hdp_flush_cp_ring_emit(rdev, ridx);
6002
6003         /* bits 0-15 are the VM contexts0-15 */
6004         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6005         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6006                                  WRITE_DATA_DST_SEL(0)));
6007         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6008         radeon_ring_write(ring, 0);
6009         radeon_ring_write(ring, 1 << vm->id);
6010
6011         /* compute doesn't have PFP */
6012         if (usepfp) {
6013                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6014                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6015                 radeon_ring_write(ring, 0x0);
6016         }
6017 }
6018
6019 /*
6020  * RLC
6021  * The RLC is a multi-purpose microengine that handles a
6022  * variety of functions, the most important of which is
6023  * the interrupt controller.
6024  */
6025 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6026                                           bool enable)
6027 {
6028         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6029
6030         if (enable)
6031                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6032         else
6033                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6034         WREG32(CP_INT_CNTL_RING0, tmp);
6035 }
6036
6037 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6038 {
6039         u32 tmp;
6040
6041         tmp = RREG32(RLC_LB_CNTL);
6042         if (enable)
6043                 tmp |= LOAD_BALANCE_ENABLE;
6044         else
6045                 tmp &= ~LOAD_BALANCE_ENABLE;
6046         WREG32(RLC_LB_CNTL, tmp);
6047 }
6048
6049 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6050 {
6051         u32 i, j, k;
6052         u32 mask;
6053
6054         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6055                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6056                         cik_select_se_sh(rdev, i, j);
6057                         for (k = 0; k < rdev->usec_timeout; k++) {
6058                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6059                                         break;
6060                                 udelay(1);
6061                         }
6062                 }
6063         }
6064         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6065
6066         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6067         for (k = 0; k < rdev->usec_timeout; k++) {
6068                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6069                         break;
6070                 udelay(1);
6071         }
6072 }
6073
6074 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6075 {
6076         u32 tmp;
6077
6078         tmp = RREG32(RLC_CNTL);
6079         if (tmp != rlc)
6080                 WREG32(RLC_CNTL, rlc);
6081 }
6082
6083 static u32 cik_halt_rlc(struct radeon_device *rdev)
6084 {
6085         u32 data, orig;
6086
6087         orig = data = RREG32(RLC_CNTL);
6088
6089         if (data & RLC_ENABLE) {
6090                 u32 i;
6091
6092                 data &= ~RLC_ENABLE;
6093                 WREG32(RLC_CNTL, data);
6094
6095                 for (i = 0; i < rdev->usec_timeout; i++) {
6096                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6097                                 break;
6098                         udelay(1);
6099                 }
6100
6101                 cik_wait_for_rlc_serdes(rdev);
6102         }
6103
6104         return orig;
6105 }
6106
6107 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6108 {
6109         u32 tmp, i, mask;
6110
6111         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6112         WREG32(RLC_GPR_REG2, tmp);
6113
6114         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6115         for (i = 0; i < rdev->usec_timeout; i++) {
6116                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6117                         break;
6118                 udelay(1);
6119         }
6120
6121         for (i = 0; i < rdev->usec_timeout; i++) {
6122                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6123                         break;
6124                 udelay(1);
6125         }
6126 }
6127
6128 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6129 {
6130         u32 tmp;
6131
6132         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6133         WREG32(RLC_GPR_REG2, tmp);
6134 }
6135
6136 /**
6137  * cik_rlc_stop - stop the RLC ME
6138  *
6139  * @rdev: radeon_device pointer
6140  *
6141  * Halt the RLC ME (MicroEngine) (CIK).
6142  */
6143 static void cik_rlc_stop(struct radeon_device *rdev)
6144 {
6145         WREG32(RLC_CNTL, 0);
6146
6147         cik_enable_gui_idle_interrupt(rdev, false);
6148
6149         cik_wait_for_rlc_serdes(rdev);
6150 }
6151
6152 /**
6153  * cik_rlc_start - start the RLC ME
6154  *
6155  * @rdev: radeon_device pointer
6156  *
6157  * Unhalt the RLC ME (MicroEngine) (CIK).
6158  */
6159 static void cik_rlc_start(struct radeon_device *rdev)
6160 {
6161         WREG32(RLC_CNTL, RLC_ENABLE);
6162
6163         cik_enable_gui_idle_interrupt(rdev, true);
6164
6165         udelay(50);
6166 }
6167
6168 /**
6169  * cik_rlc_resume - setup the RLC hw
6170  *
6171  * @rdev: radeon_device pointer
6172  *
6173  * Initialize the RLC registers, load the ucode,
6174  * and start the RLC (CIK).
6175  * Returns 0 for success, -EINVAL if the ucode is not available.
6176  */
6177 static int cik_rlc_resume(struct radeon_device *rdev)
6178 {
6179         u32 i, size, tmp;
6180
6181         if (!rdev->rlc_fw)
6182                 return -EINVAL;
6183
6184         cik_rlc_stop(rdev);
6185
6186         /* disable CG */
6187         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6188         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6189
6190         si_rlc_reset(rdev);
6191
6192         cik_init_pg(rdev);
6193
6194         cik_init_cg(rdev);
6195
6196         WREG32(RLC_LB_CNTR_INIT, 0);
6197         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6198
6199         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6200         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6201         WREG32(RLC_LB_PARAMS, 0x00600408);
6202         WREG32(RLC_LB_CNTL, 0x80000004);
6203
6204         WREG32(RLC_MC_CNTL, 0);
6205         WREG32(RLC_UCODE_CNTL, 0);
6206
6207         if (rdev->new_fw) {
6208                 const struct rlc_firmware_header_v1_0 *hdr =
6209                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6210                 const __le32 *fw_data = (const __le32 *)
6211                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6212
6213                 radeon_ucode_print_rlc_hdr(&hdr->header);
6214
6215                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6216                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6217                 for (i = 0; i < size; i++)
6218                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6219                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6220         } else {
6221                 const __be32 *fw_data;
6222
6223                 switch (rdev->family) {
6224                 case CHIP_BONAIRE:
6225                 case CHIP_HAWAII:
6226                 default:
6227                         size = BONAIRE_RLC_UCODE_SIZE;
6228                         break;
6229                 case CHIP_KAVERI:
6230                         size = KV_RLC_UCODE_SIZE;
6231                         break;
6232                 case CHIP_KABINI:
6233                         size = KB_RLC_UCODE_SIZE;
6234                         break;
6235                 case CHIP_MULLINS:
6236                         size = ML_RLC_UCODE_SIZE;
6237                         break;
6238                 }
6239
6240                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6241                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6242                 for (i = 0; i < size; i++)
6243                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6244                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6245         }
6246
6247         /* XXX - find out what chips support lbpw */
6248         cik_enable_lbpw(rdev, false);
6249
6250         if (rdev->family == CHIP_BONAIRE)
6251                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6252
6253         cik_rlc_start(rdev);
6254
6255         return 0;
6256 }
6257
6258 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6259 {
6260         u32 data, orig, tmp, tmp2;
6261
6262         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6263
6264         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6265                 cik_enable_gui_idle_interrupt(rdev, true);
6266
6267                 tmp = cik_halt_rlc(rdev);
6268
6269                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6270                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6271                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6272                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6273                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6274
6275                 cik_update_rlc(rdev, tmp);
6276
6277                 data |= CGCG_EN | CGLS_EN;
6278         } else {
6279                 cik_enable_gui_idle_interrupt(rdev, false);
6280
6281                 RREG32(CB_CGTT_SCLK_CTRL);
6282                 RREG32(CB_CGTT_SCLK_CTRL);
6283                 RREG32(CB_CGTT_SCLK_CTRL);
6284                 RREG32(CB_CGTT_SCLK_CTRL);
6285
6286                 data &= ~(CGCG_EN | CGLS_EN);
6287         }
6288
6289         if (orig != data)
6290                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6291
6292 }
6293
6294 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6295 {
6296         u32 data, orig, tmp = 0;
6297
6298         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6299                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6300                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6301                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6302                                 data |= CP_MEM_LS_EN;
6303                                 if (orig != data)
6304                                         WREG32(CP_MEM_SLP_CNTL, data);
6305                         }
6306                 }
6307
6308                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6309                 data &= 0xfffffffd;
6310                 if (orig != data)
6311                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6312
6313                 tmp = cik_halt_rlc(rdev);
6314
6315                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6316                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6317                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6318                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6319                 WREG32(RLC_SERDES_WR_CTRL, data);
6320
6321                 cik_update_rlc(rdev, tmp);
6322
6323                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6324                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6325                         data &= ~SM_MODE_MASK;
6326                         data |= SM_MODE(0x2);
6327                         data |= SM_MODE_ENABLE;
6328                         data &= ~CGTS_OVERRIDE;
6329                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6330                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6331                                 data &= ~CGTS_LS_OVERRIDE;
6332                         data &= ~ON_MONITOR_ADD_MASK;
6333                         data |= ON_MONITOR_ADD_EN;
6334                         data |= ON_MONITOR_ADD(0x96);
6335                         if (orig != data)
6336                                 WREG32(CGTS_SM_CTRL_REG, data);
6337                 }
6338         } else {
6339                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6340                 data |= 0x00000002;
6341                 if (orig != data)
6342                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6343
6344                 data = RREG32(RLC_MEM_SLP_CNTL);
6345                 if (data & RLC_MEM_LS_EN) {
6346                         data &= ~RLC_MEM_LS_EN;
6347                         WREG32(RLC_MEM_SLP_CNTL, data);
6348                 }
6349
6350                 data = RREG32(CP_MEM_SLP_CNTL);
6351                 if (data & CP_MEM_LS_EN) {
6352                         data &= ~CP_MEM_LS_EN;
6353                         WREG32(CP_MEM_SLP_CNTL, data);
6354                 }
6355
6356                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6357                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6358                 if (orig != data)
6359                         WREG32(CGTS_SM_CTRL_REG, data);
6360
6361                 tmp = cik_halt_rlc(rdev);
6362
6363                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6364                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6365                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6366                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6367                 WREG32(RLC_SERDES_WR_CTRL, data);
6368
6369                 cik_update_rlc(rdev, tmp);
6370         }
6371 }
6372
6373 static const u32 mc_cg_registers[] =
6374 {
6375         MC_HUB_MISC_HUB_CG,
6376         MC_HUB_MISC_SIP_CG,
6377         MC_HUB_MISC_VM_CG,
6378         MC_XPB_CLK_GAT,
6379         ATC_MISC_CG,
6380         MC_CITF_MISC_WR_CG,
6381         MC_CITF_MISC_RD_CG,
6382         MC_CITF_MISC_VM_CG,
6383         VM_L2_CG,
6384 };
6385
6386 static void cik_enable_mc_ls(struct radeon_device *rdev,
6387                              bool enable)
6388 {
6389         int i;
6390         u32 orig, data;
6391
6392         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6393                 orig = data = RREG32(mc_cg_registers[i]);
6394                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6395                         data |= MC_LS_ENABLE;
6396                 else
6397                         data &= ~MC_LS_ENABLE;
6398                 if (data != orig)
6399                         WREG32(mc_cg_registers[i], data);
6400         }
6401 }
6402
6403 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6404                                bool enable)
6405 {
6406         int i;
6407         u32 orig, data;
6408
6409         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6410                 orig = data = RREG32(mc_cg_registers[i]);
6411                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6412                         data |= MC_CG_ENABLE;
6413                 else
6414                         data &= ~MC_CG_ENABLE;
6415                 if (data != orig)
6416                         WREG32(mc_cg_registers[i], data);
6417         }
6418 }
6419
6420 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6421                                  bool enable)
6422 {
6423         u32 orig, data;
6424
6425         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6426                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6427                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6428         } else {
6429                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6430                 data |= 0xff000000;
6431                 if (data != orig)
6432                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6433
6434                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6435                 data |= 0xff000000;
6436                 if (data != orig)
6437                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6438         }
6439 }
6440
6441 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6442                                  bool enable)
6443 {
6444         u32 orig, data;
6445
6446         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6447                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6448                 data |= 0x100;
6449                 if (orig != data)
6450                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6451
6452                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6453                 data |= 0x100;
6454                 if (orig != data)
6455                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6456         } else {
6457                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6458                 data &= ~0x100;
6459                 if (orig != data)
6460                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6461
6462                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6463                 data &= ~0x100;
6464                 if (orig != data)
6465                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6466         }
6467 }
6468
6469 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6470                                 bool enable)
6471 {
6472         u32 orig, data;
6473
6474         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6475                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6476                 data = 0xfff;
6477                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6478
6479                 orig = data = RREG32(UVD_CGC_CTRL);
6480                 data |= DCM;
6481                 if (orig != data)
6482                         WREG32(UVD_CGC_CTRL, data);
6483         } else {
6484                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6485                 data &= ~0xfff;
6486                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6487
6488                 orig = data = RREG32(UVD_CGC_CTRL);
6489                 data &= ~DCM;
6490                 if (orig != data)
6491                         WREG32(UVD_CGC_CTRL, data);
6492         }
6493 }
6494
6495 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6496                                bool enable)
6497 {
6498         u32 orig, data;
6499
6500         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6501
6502         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6503                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6504                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6505         else
6506                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6507                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6508
6509         if (orig != data)
6510                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6511 }
6512
6513 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6514                                 bool enable)
6515 {
6516         u32 orig, data;
6517
6518         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6519
6520         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6521                 data &= ~CLOCK_GATING_DIS;
6522         else
6523                 data |= CLOCK_GATING_DIS;
6524
6525         if (orig != data)
6526                 WREG32(HDP_HOST_PATH_CNTL, data);
6527 }
6528
6529 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6530                               bool enable)
6531 {
6532         u32 orig, data;
6533
6534         orig = data = RREG32(HDP_MEM_POWER_LS);
6535
6536         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6537                 data |= HDP_LS_ENABLE;
6538         else
6539                 data &= ~HDP_LS_ENABLE;
6540
6541         if (orig != data)
6542                 WREG32(HDP_MEM_POWER_LS, data);
6543 }
6544
6545 void cik_update_cg(struct radeon_device *rdev,
6546                    u32 block, bool enable)
6547 {
6548
6549         if (block & RADEON_CG_BLOCK_GFX) {
6550                 cik_enable_gui_idle_interrupt(rdev, false);
6551                 /* order matters! */
6552                 if (enable) {
6553                         cik_enable_mgcg(rdev, true);
6554                         cik_enable_cgcg(rdev, true);
6555                 } else {
6556                         cik_enable_cgcg(rdev, false);
6557                         cik_enable_mgcg(rdev, false);
6558                 }
6559                 cik_enable_gui_idle_interrupt(rdev, true);
6560         }
6561
6562         if (block & RADEON_CG_BLOCK_MC) {
6563                 if (!(rdev->flags & RADEON_IS_IGP)) {
6564                         cik_enable_mc_mgcg(rdev, enable);
6565                         cik_enable_mc_ls(rdev, enable);
6566                 }
6567         }
6568
6569         if (block & RADEON_CG_BLOCK_SDMA) {
6570                 cik_enable_sdma_mgcg(rdev, enable);
6571                 cik_enable_sdma_mgls(rdev, enable);
6572         }
6573
6574         if (block & RADEON_CG_BLOCK_BIF) {
6575                 cik_enable_bif_mgls(rdev, enable);
6576         }
6577
6578         if (block & RADEON_CG_BLOCK_UVD) {
6579                 if (rdev->has_uvd)
6580                         cik_enable_uvd_mgcg(rdev, enable);
6581         }
6582
6583         if (block & RADEON_CG_BLOCK_HDP) {
6584                 cik_enable_hdp_mgcg(rdev, enable);
6585                 cik_enable_hdp_ls(rdev, enable);
6586         }
6587
6588         if (block & RADEON_CG_BLOCK_VCE) {
6589                 vce_v2_0_enable_mgcg(rdev, enable);
6590         }
6591 }
6592
6593 static void cik_init_cg(struct radeon_device *rdev)
6594 {
6595
6596         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6597
6598         if (rdev->has_uvd)
6599                 si_init_uvd_internal_cg(rdev);
6600
6601         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6602                              RADEON_CG_BLOCK_SDMA |
6603                              RADEON_CG_BLOCK_BIF |
6604                              RADEON_CG_BLOCK_UVD |
6605                              RADEON_CG_BLOCK_HDP), true);
6606 }
6607
6608 static void cik_fini_cg(struct radeon_device *rdev)
6609 {
6610         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6611                              RADEON_CG_BLOCK_SDMA |
6612                              RADEON_CG_BLOCK_BIF |
6613                              RADEON_CG_BLOCK_UVD |
6614                              RADEON_CG_BLOCK_HDP), false);
6615
6616         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6617 }
6618
6619 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6620                                           bool enable)
6621 {
6622         u32 data, orig;
6623
6624         orig = data = RREG32(RLC_PG_CNTL);
6625         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6626                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6627         else
6628                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6629         if (orig != data)
6630                 WREG32(RLC_PG_CNTL, data);
6631 }
6632
6633 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6634                                           bool enable)
6635 {
6636         u32 data, orig;
6637
6638         orig = data = RREG32(RLC_PG_CNTL);
6639         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6640                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6641         else
6642                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6643         if (orig != data)
6644                 WREG32(RLC_PG_CNTL, data);
6645 }
6646
6647 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6648 {
6649         u32 data, orig;
6650
6651         orig = data = RREG32(RLC_PG_CNTL);
6652         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6653                 data &= ~DISABLE_CP_PG;
6654         else
6655                 data |= DISABLE_CP_PG;
6656         if (orig != data)
6657                 WREG32(RLC_PG_CNTL, data);
6658 }
6659
6660 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6661 {
6662         u32 data, orig;
6663
6664         orig = data = RREG32(RLC_PG_CNTL);
6665         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6666                 data &= ~DISABLE_GDS_PG;
6667         else
6668                 data |= DISABLE_GDS_PG;
6669         if (orig != data)
6670                 WREG32(RLC_PG_CNTL, data);
6671 }
6672
6673 #define CP_ME_TABLE_SIZE    96
6674 #define CP_ME_TABLE_OFFSET  2048
6675 #define CP_MEC_TABLE_OFFSET 4096
6676
6677 void cik_init_cp_pg_table(struct radeon_device *rdev)
6678 {
6679         volatile u32 *dst_ptr;
6680         int me, i, max_me = 4;
6681         u32 bo_offset = 0;
6682         u32 table_offset, table_size;
6683
6684         if (rdev->family == CHIP_KAVERI)
6685                 max_me = 5;
6686
6687         if (rdev->rlc.cp_table_ptr == NULL)
6688                 return;
6689
6690         /* write the cp table buffer */
6691         dst_ptr = rdev->rlc.cp_table_ptr;
6692         for (me = 0; me < max_me; me++) {
6693                 if (rdev->new_fw) {
6694                         const __le32 *fw_data;
6695                         const struct gfx_firmware_header_v1_0 *hdr;
6696
6697                         if (me == 0) {
6698                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6699                                 fw_data = (const __le32 *)
6700                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6701                                 table_offset = le32_to_cpu(hdr->jt_offset);
6702                                 table_size = le32_to_cpu(hdr->jt_size);
6703                         } else if (me == 1) {
6704                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6705                                 fw_data = (const __le32 *)
6706                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6707                                 table_offset = le32_to_cpu(hdr->jt_offset);
6708                                 table_size = le32_to_cpu(hdr->jt_size);
6709                         } else if (me == 2) {
6710                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6711                                 fw_data = (const __le32 *)
6712                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6713                                 table_offset = le32_to_cpu(hdr->jt_offset);
6714                                 table_size = le32_to_cpu(hdr->jt_size);
6715                         } else if (me == 3) {
6716                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6717                                 fw_data = (const __le32 *)
6718                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6719                                 table_offset = le32_to_cpu(hdr->jt_offset);
6720                                 table_size = le32_to_cpu(hdr->jt_size);
6721                         } else {
6722                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6723                                 fw_data = (const __le32 *)
6724                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6725                                 table_offset = le32_to_cpu(hdr->jt_offset);
6726                                 table_size = le32_to_cpu(hdr->jt_size);
6727                         }
6728
6729                         for (i = 0; i < table_size; i ++) {
6730                                 dst_ptr[bo_offset + i] =
6731                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6732                         }
6733                         bo_offset += table_size;
6734                 } else {
6735                         const __be32 *fw_data;
6736                         table_size = CP_ME_TABLE_SIZE;
6737
6738                         if (me == 0) {
6739                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6740                                 table_offset = CP_ME_TABLE_OFFSET;
6741                         } else if (me == 1) {
6742                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6743                                 table_offset = CP_ME_TABLE_OFFSET;
6744                         } else if (me == 2) {
6745                                 fw_data = (const __be32 *)rdev->me_fw->data;
6746                                 table_offset = CP_ME_TABLE_OFFSET;
6747                         } else {
6748                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6749                                 table_offset = CP_MEC_TABLE_OFFSET;
6750                         }
6751
6752                         for (i = 0; i < table_size; i ++) {
6753                                 dst_ptr[bo_offset + i] =
6754                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6755                         }
6756                         bo_offset += table_size;
6757                 }
6758         }
6759 }
6760
6761 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6762                                 bool enable)
6763 {
6764         u32 data, orig;
6765
6766         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6767                 orig = data = RREG32(RLC_PG_CNTL);
6768                 data |= GFX_PG_ENABLE;
6769                 if (orig != data)
6770                         WREG32(RLC_PG_CNTL, data);
6771
6772                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6773                 data |= AUTO_PG_EN;
6774                 if (orig != data)
6775                         WREG32(RLC_AUTO_PG_CTRL, data);
6776         } else {
6777                 orig = data = RREG32(RLC_PG_CNTL);
6778                 data &= ~GFX_PG_ENABLE;
6779                 if (orig != data)
6780                         WREG32(RLC_PG_CNTL, data);
6781
6782                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6783                 data &= ~AUTO_PG_EN;
6784                 if (orig != data)
6785                         WREG32(RLC_AUTO_PG_CTRL, data);
6786
6787                 data = RREG32(DB_RENDER_CONTROL);
6788         }
6789 }
6790
6791 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6792 {
6793         u32 mask = 0, tmp, tmp1;
6794         int i;
6795
6796         cik_select_se_sh(rdev, se, sh);
6797         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6798         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6799         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6800
6801         tmp &= 0xffff0000;
6802
6803         tmp |= tmp1;
6804         tmp >>= 16;
6805
6806         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6807                 mask <<= 1;
6808                 mask |= 1;
6809         }
6810
6811         return (~tmp) & mask;
6812 }
6813
6814 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6815 {
6816         u32 i, j, k, active_cu_number = 0;
6817         u32 mask, counter, cu_bitmap;
6818         u32 tmp = 0;
6819
6820         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6821                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6822                         mask = 1;
6823                         cu_bitmap = 0;
6824                         counter = 0;
6825                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6826                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6827                                         if (counter < 2)
6828                                                 cu_bitmap |= mask;
6829                                         counter ++;
6830                                 }
6831                                 mask <<= 1;
6832                         }
6833
6834                         active_cu_number += counter;
6835                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6836                 }
6837         }
6838
6839         WREG32(RLC_PG_AO_CU_MASK, tmp);
6840
6841         tmp = RREG32(RLC_MAX_PG_CU);
6842         tmp &= ~MAX_PU_CU_MASK;
6843         tmp |= MAX_PU_CU(active_cu_number);
6844         WREG32(RLC_MAX_PG_CU, tmp);
6845 }
6846
6847 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6848                                        bool enable)
6849 {
6850         u32 data, orig;
6851
6852         orig = data = RREG32(RLC_PG_CNTL);
6853         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6854                 data |= STATIC_PER_CU_PG_ENABLE;
6855         else
6856                 data &= ~STATIC_PER_CU_PG_ENABLE;
6857         if (orig != data)
6858                 WREG32(RLC_PG_CNTL, data);
6859 }
6860
6861 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6862                                         bool enable)
6863 {
6864         u32 data, orig;
6865
6866         orig = data = RREG32(RLC_PG_CNTL);
6867         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6868                 data |= DYN_PER_CU_PG_ENABLE;
6869         else
6870                 data &= ~DYN_PER_CU_PG_ENABLE;
6871         if (orig != data)
6872                 WREG32(RLC_PG_CNTL, data);
6873 }
6874
6875 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6876 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6877
6878 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6879 {
6880         u32 data, orig;
6881         u32 i;
6882
6883         if (rdev->rlc.cs_data) {
6884                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6885                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6886                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6887                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6888         } else {
6889                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6890                 for (i = 0; i < 3; i++)
6891                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6892         }
6893         if (rdev->rlc.reg_list) {
6894                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6895                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6896                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6897         }
6898
6899         orig = data = RREG32(RLC_PG_CNTL);
6900         data |= GFX_PG_SRC;
6901         if (orig != data)
6902                 WREG32(RLC_PG_CNTL, data);
6903
6904         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6905         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6906
6907         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6908         data &= ~IDLE_POLL_COUNT_MASK;
6909         data |= IDLE_POLL_COUNT(0x60);
6910         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6911
6912         data = 0x10101010;
6913         WREG32(RLC_PG_DELAY, data);
6914
6915         data = RREG32(RLC_PG_DELAY_2);
6916         data &= ~0xff;
6917         data |= 0x3;
6918         WREG32(RLC_PG_DELAY_2, data);
6919
6920         data = RREG32(RLC_AUTO_PG_CTRL);
6921         data &= ~GRBM_REG_SGIT_MASK;
6922         data |= GRBM_REG_SGIT(0x700);
6923         WREG32(RLC_AUTO_PG_CTRL, data);
6924
6925 }
6926
6927 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6928 {
6929         cik_enable_gfx_cgpg(rdev, enable);
6930         cik_enable_gfx_static_mgpg(rdev, enable);
6931         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6932 }
6933
6934 u32 cik_get_csb_size(struct radeon_device *rdev)
6935 {
6936         u32 count = 0;
6937         const struct cs_section_def *sect = NULL;
6938         const struct cs_extent_def *ext = NULL;
6939
6940         if (rdev->rlc.cs_data == NULL)
6941                 return 0;
6942
6943         /* begin clear state */
6944         count += 2;
6945         /* context control state */
6946         count += 3;
6947
6948         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6949                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6950                         if (sect->id == SECT_CONTEXT)
6951                                 count += 2 + ext->reg_count;
6952                         else
6953                                 return 0;
6954                 }
6955         }
6956         /* pa_sc_raster_config/pa_sc_raster_config1 */
6957         count += 4;
6958         /* end clear state */
6959         count += 2;
6960         /* clear state */
6961         count += 2;
6962
6963         return count;
6964 }
6965
6966 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6967 {
6968         u32 count = 0, i;
6969         const struct cs_section_def *sect = NULL;
6970         const struct cs_extent_def *ext = NULL;
6971
6972         if (rdev->rlc.cs_data == NULL)
6973                 return;
6974         if (buffer == NULL)
6975                 return;
6976
6977         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6978         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6979
6980         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6981         buffer[count++] = cpu_to_le32(0x80000000);
6982         buffer[count++] = cpu_to_le32(0x80000000);
6983
6984         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6985                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6986                         if (sect->id == SECT_CONTEXT) {
6987                                 buffer[count++] =
6988                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6989                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6990                                 for (i = 0; i < ext->reg_count; i++)
6991                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6992                         } else {
6993                                 return;
6994                         }
6995                 }
6996         }
6997
6998         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6999         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7000         switch (rdev->family) {
7001         case CHIP_BONAIRE:
7002                 buffer[count++] = cpu_to_le32(0x16000012);
7003                 buffer[count++] = cpu_to_le32(0x00000000);
7004                 break;
7005         case CHIP_KAVERI:
7006                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7007                 buffer[count++] = cpu_to_le32(0x00000000);
7008                 break;
7009         case CHIP_KABINI:
7010         case CHIP_MULLINS:
7011                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7012                 buffer[count++] = cpu_to_le32(0x00000000);
7013                 break;
7014         case CHIP_HAWAII:
7015                 buffer[count++] = cpu_to_le32(0x3a00161a);
7016                 buffer[count++] = cpu_to_le32(0x0000002e);
7017                 break;
7018         default:
7019                 buffer[count++] = cpu_to_le32(0x00000000);
7020                 buffer[count++] = cpu_to_le32(0x00000000);
7021                 break;
7022         }
7023
7024         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7025         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7026
7027         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7028         buffer[count++] = cpu_to_le32(0);
7029 }
7030
7031 static void cik_init_pg(struct radeon_device *rdev)
7032 {
7033         if (rdev->pg_flags) {
7034                 cik_enable_sck_slowdown_on_pu(rdev, true);
7035                 cik_enable_sck_slowdown_on_pd(rdev, true);
7036                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7037                         cik_init_gfx_cgpg(rdev);
7038                         cik_enable_cp_pg(rdev, true);
7039                         cik_enable_gds_pg(rdev, true);
7040                 }
7041                 cik_init_ao_cu_mask(rdev);
7042                 cik_update_gfx_pg(rdev, true);
7043         }
7044 }
7045
7046 static void cik_fini_pg(struct radeon_device *rdev)
7047 {
7048         if (rdev->pg_flags) {
7049                 cik_update_gfx_pg(rdev, false);
7050                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7051                         cik_enable_cp_pg(rdev, false);
7052                         cik_enable_gds_pg(rdev, false);
7053                 }
7054         }
7055 }
7056
7057 /*
7058  * Interrupts
7059  * Starting with r6xx, interrupts are handled via a ring buffer.
7060  * Ring buffers are areas of GPU accessible memory that the GPU
7061  * writes interrupt vectors into and the host reads vectors out of.
7062  * There is a rptr (read pointer) that determines where the
7063  * host is currently reading, and a wptr (write pointer)
7064  * which determines where the GPU has written.  When the
7065  * pointers are equal, the ring is idle.  When the GPU
7066  * writes vectors to the ring buffer, it increments the
7067  * wptr.  When there is an interrupt, the host then starts
7068  * fetching commands and processing them until the pointers are
7069  * equal again at which point it updates the rptr.
7070  */
7071
7072 /**
7073  * cik_enable_interrupts - Enable the interrupt ring buffer
7074  *
7075  * @rdev: radeon_device pointer
7076  *
7077  * Enable the interrupt ring buffer (CIK).
7078  */
7079 static void cik_enable_interrupts(struct radeon_device *rdev)
7080 {
7081         u32 ih_cntl = RREG32(IH_CNTL);
7082         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7083
7084         ih_cntl |= ENABLE_INTR;
7085         ih_rb_cntl |= IH_RB_ENABLE;
7086         WREG32(IH_CNTL, ih_cntl);
7087         WREG32(IH_RB_CNTL, ih_rb_cntl);
7088         rdev->ih.enabled = true;
7089 }
7090
7091 /**
7092  * cik_disable_interrupts - Disable the interrupt ring buffer
7093  *
7094  * @rdev: radeon_device pointer
7095  *
7096  * Disable the interrupt ring buffer (CIK).
7097  */
7098 static void cik_disable_interrupts(struct radeon_device *rdev)
7099 {
7100         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7101         u32 ih_cntl = RREG32(IH_CNTL);
7102
7103         ih_rb_cntl &= ~IH_RB_ENABLE;
7104         ih_cntl &= ~ENABLE_INTR;
7105         WREG32(IH_RB_CNTL, ih_rb_cntl);
7106         WREG32(IH_CNTL, ih_cntl);
7107         /* set rptr, wptr to 0 */
7108         WREG32(IH_RB_RPTR, 0);
7109         WREG32(IH_RB_WPTR, 0);
7110         rdev->ih.enabled = false;
7111         rdev->ih.rptr = 0;
7112 }
7113
7114 /**
7115  * cik_disable_interrupt_state - Disable all interrupt sources
7116  *
7117  * @rdev: radeon_device pointer
7118  *
7119  * Clear all interrupt enable bits used by the driver (CIK).
7120  */
7121 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7122 {
7123         u32 tmp;
7124
7125         /* gfx ring */
7126         tmp = RREG32(CP_INT_CNTL_RING0) &
7127                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7128         WREG32(CP_INT_CNTL_RING0, tmp);
7129         /* sdma */
7130         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7131         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7132         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7133         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7134         /* compute queues */
7135         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7136         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7137         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7138         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7139         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7140         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7141         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7142         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7143         /* grbm */
7144         WREG32(GRBM_INT_CNTL, 0);
7145         /* vline/vblank, etc. */
7146         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7147         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7148         if (rdev->num_crtc >= 4) {
7149                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7150                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7151         }
7152         if (rdev->num_crtc >= 6) {
7153                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7154                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7155         }
7156         /* pflip */
7157         if (rdev->num_crtc >= 2) {
7158                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7159                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7160         }
7161         if (rdev->num_crtc >= 4) {
7162                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7163                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7164         }
7165         if (rdev->num_crtc >= 6) {
7166                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7167                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7168         }
7169
7170         /* dac hotplug */
7171         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7172
7173         /* digital hotplug */
7174         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7175         WREG32(DC_HPD1_INT_CONTROL, tmp);
7176         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7177         WREG32(DC_HPD2_INT_CONTROL, tmp);
7178         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7179         WREG32(DC_HPD3_INT_CONTROL, tmp);
7180         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7181         WREG32(DC_HPD4_INT_CONTROL, tmp);
7182         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7183         WREG32(DC_HPD5_INT_CONTROL, tmp);
7184         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7185         WREG32(DC_HPD6_INT_CONTROL, tmp);
7186
7187 }
7188
7189 /**
7190  * cik_irq_init - init and enable the interrupt ring
7191  *
7192  * @rdev: radeon_device pointer
7193  *
7194  * Allocate a ring buffer for the interrupt controller,
7195  * enable the RLC, disable interrupts, enable the IH
7196  * ring buffer and enable it (CIK).
7197  * Called at device load and reume.
7198  * Returns 0 for success, errors for failure.
7199  */
7200 static int cik_irq_init(struct radeon_device *rdev)
7201 {
7202         int ret = 0;
7203         int rb_bufsz;
7204         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7205
7206         /* allocate ring */
7207         ret = r600_ih_ring_alloc(rdev);
7208         if (ret)
7209                 return ret;
7210
7211         /* disable irqs */
7212         cik_disable_interrupts(rdev);
7213
7214         /* init rlc */
7215         ret = cik_rlc_resume(rdev);
7216         if (ret) {
7217                 r600_ih_ring_fini(rdev);
7218                 return ret;
7219         }
7220
7221         /* setup interrupt control */
7222         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7223         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7224         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7225         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7226          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7227          */
7228         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7229         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7230         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7231         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7232
7233         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7234         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7235
7236         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7237                       IH_WPTR_OVERFLOW_CLEAR |
7238                       (rb_bufsz << 1));
7239
7240         if (rdev->wb.enabled)
7241                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7242
7243         /* set the writeback address whether it's enabled or not */
7244         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7245         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7246
7247         WREG32(IH_RB_CNTL, ih_rb_cntl);
7248
7249         /* set rptr, wptr to 0 */
7250         WREG32(IH_RB_RPTR, 0);
7251         WREG32(IH_RB_WPTR, 0);
7252
7253         /* Default settings for IH_CNTL (disabled at first) */
7254         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7255         /* RPTR_REARM only works if msi's are enabled */
7256         if (rdev->msi_enabled)
7257                 ih_cntl |= RPTR_REARM;
7258         WREG32(IH_CNTL, ih_cntl);
7259
7260         /* force the active interrupt state to all disabled */
7261         cik_disable_interrupt_state(rdev);
7262
7263         pci_set_master(rdev->pdev);
7264
7265         /* enable irqs */
7266         cik_enable_interrupts(rdev);
7267
7268         return ret;
7269 }
7270
7271 /**
7272  * cik_irq_set - enable/disable interrupt sources
7273  *
7274  * @rdev: radeon_device pointer
7275  *
7276  * Enable interrupt sources on the GPU (vblanks, hpd,
7277  * etc.) (CIK).
7278  * Returns 0 for success, errors for failure.
7279  */
7280 int cik_irq_set(struct radeon_device *rdev)
7281 {
7282         u32 cp_int_cntl;
7283         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7284         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7285         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7286         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7287         u32 grbm_int_cntl = 0;
7288         u32 dma_cntl, dma_cntl1;
7289         u32 thermal_int;
7290
7291         if (!rdev->irq.installed) {
7292                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7293                 return -EINVAL;
7294         }
7295         /* don't enable anything if the ih is disabled */
7296         if (!rdev->ih.enabled) {
7297                 cik_disable_interrupts(rdev);
7298                 /* force the active interrupt state to all disabled */
7299                 cik_disable_interrupt_state(rdev);
7300                 return 0;
7301         }
7302
7303         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7304                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7305         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7306
7307         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7308         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7309         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7310         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7311         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7312         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7313
7314         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7315         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7316
7317         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7318         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7319         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7320         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7321         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7322         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7323         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7324         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7325
7326         if (rdev->flags & RADEON_IS_IGP)
7327                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7328                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
7329         else
7330                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7331                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7332
7333         /* enable CP interrupts on all rings */
7334         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7335                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7336                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7337         }
7338         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7339                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7340                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7341                 if (ring->me == 1) {
7342                         switch (ring->pipe) {
7343                         case 0:
7344                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7345                                 break;
7346                         case 1:
7347                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7348                                 break;
7349                         case 2:
7350                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7351                                 break;
7352                         case 3:
7353                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7354                                 break;
7355                         default:
7356                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7357                                 break;
7358                         }
7359                 } else if (ring->me == 2) {
7360                         switch (ring->pipe) {
7361                         case 0:
7362                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7363                                 break;
7364                         case 1:
7365                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7366                                 break;
7367                         case 2:
7368                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7369                                 break;
7370                         case 3:
7371                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7372                                 break;
7373                         default:
7374                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7375                                 break;
7376                         }
7377                 } else {
7378                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7379                 }
7380         }
7381         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7382                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7383                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7384                 if (ring->me == 1) {
7385                         switch (ring->pipe) {
7386                         case 0:
7387                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7388                                 break;
7389                         case 1:
7390                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7391                                 break;
7392                         case 2:
7393                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7394                                 break;
7395                         case 3:
7396                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7397                                 break;
7398                         default:
7399                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7400                                 break;
7401                         }
7402                 } else if (ring->me == 2) {
7403                         switch (ring->pipe) {
7404                         case 0:
7405                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7406                                 break;
7407                         case 1:
7408                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7409                                 break;
7410                         case 2:
7411                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7412                                 break;
7413                         case 3:
7414                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7415                                 break;
7416                         default:
7417                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7418                                 break;
7419                         }
7420                 } else {
7421                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7422                 }
7423         }
7424
7425         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7426                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7427                 dma_cntl |= TRAP_ENABLE;
7428         }
7429
7430         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7431                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7432                 dma_cntl1 |= TRAP_ENABLE;
7433         }
7434
7435         if (rdev->irq.crtc_vblank_int[0] ||
7436             atomic_read(&rdev->irq.pflip[0])) {
7437                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7438                 crtc1 |= VBLANK_INTERRUPT_MASK;
7439         }
7440         if (rdev->irq.crtc_vblank_int[1] ||
7441             atomic_read(&rdev->irq.pflip[1])) {
7442                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7443                 crtc2 |= VBLANK_INTERRUPT_MASK;
7444         }
7445         if (rdev->irq.crtc_vblank_int[2] ||
7446             atomic_read(&rdev->irq.pflip[2])) {
7447                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7448                 crtc3 |= VBLANK_INTERRUPT_MASK;
7449         }
7450         if (rdev->irq.crtc_vblank_int[3] ||
7451             atomic_read(&rdev->irq.pflip[3])) {
7452                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7453                 crtc4 |= VBLANK_INTERRUPT_MASK;
7454         }
7455         if (rdev->irq.crtc_vblank_int[4] ||
7456             atomic_read(&rdev->irq.pflip[4])) {
7457                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7458                 crtc5 |= VBLANK_INTERRUPT_MASK;
7459         }
7460         if (rdev->irq.crtc_vblank_int[5] ||
7461             atomic_read(&rdev->irq.pflip[5])) {
7462                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7463                 crtc6 |= VBLANK_INTERRUPT_MASK;
7464         }
7465         if (rdev->irq.hpd[0]) {
7466                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7467                 hpd1 |= DC_HPDx_INT_EN;
7468         }
7469         if (rdev->irq.hpd[1]) {
7470                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7471                 hpd2 |= DC_HPDx_INT_EN;
7472         }
7473         if (rdev->irq.hpd[2]) {
7474                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7475                 hpd3 |= DC_HPDx_INT_EN;
7476         }
7477         if (rdev->irq.hpd[3]) {
7478                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7479                 hpd4 |= DC_HPDx_INT_EN;
7480         }
7481         if (rdev->irq.hpd[4]) {
7482                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7483                 hpd5 |= DC_HPDx_INT_EN;
7484         }
7485         if (rdev->irq.hpd[5]) {
7486                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7487                 hpd6 |= DC_HPDx_INT_EN;
7488         }
7489
7490         if (rdev->irq.dpm_thermal) {
7491                 DRM_DEBUG("dpm thermal\n");
7492                 if (rdev->flags & RADEON_IS_IGP)
7493                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7494                 else
7495                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7496         }
7497
7498         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7499
7500         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7501         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7502
7503         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7504         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7505         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7506         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7507         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7508         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7509         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7510         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7511
7512         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7513
7514         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7515         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7516         if (rdev->num_crtc >= 4) {
7517                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7518                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7519         }
7520         if (rdev->num_crtc >= 6) {
7521                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7522                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7523         }
7524
7525         if (rdev->num_crtc >= 2) {
7526                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7527                        GRPH_PFLIP_INT_MASK);
7528                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7529                        GRPH_PFLIP_INT_MASK);
7530         }
7531         if (rdev->num_crtc >= 4) {
7532                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7533                        GRPH_PFLIP_INT_MASK);
7534                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7535                        GRPH_PFLIP_INT_MASK);
7536         }
7537         if (rdev->num_crtc >= 6) {
7538                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7539                        GRPH_PFLIP_INT_MASK);
7540                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7541                        GRPH_PFLIP_INT_MASK);
7542         }
7543
7544         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7545         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7546         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7547         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7548         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7549         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7550
7551         if (rdev->flags & RADEON_IS_IGP)
7552                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7553         else
7554                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7555
7556         return 0;
7557 }
7558
7559 /**
7560  * cik_irq_ack - ack interrupt sources
7561  *
7562  * @rdev: radeon_device pointer
7563  *
7564  * Ack interrupt sources on the GPU (vblanks, hpd,
7565  * etc.) (CIK).  Certain interrupts sources are sw
7566  * generated and do not require an explicit ack.
7567  */
7568 static inline void cik_irq_ack(struct radeon_device *rdev)
7569 {
7570         u32 tmp;
7571
7572         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7573         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7574         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7575         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7576         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7577         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7578         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7579
7580         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7581                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7582         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7583                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7584         if (rdev->num_crtc >= 4) {
7585                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7586                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7587                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7588                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7589         }
7590         if (rdev->num_crtc >= 6) {
7591                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7592                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7593                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7594                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7595         }
7596
7597         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7598                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7599                        GRPH_PFLIP_INT_CLEAR);
7600         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7601                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7602                        GRPH_PFLIP_INT_CLEAR);
7603         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7604                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7605         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7606                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7607         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7608                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7609         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7610                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7611
7612         if (rdev->num_crtc >= 4) {
7613                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7614                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7615                                GRPH_PFLIP_INT_CLEAR);
7616                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7617                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7618                                GRPH_PFLIP_INT_CLEAR);
7619                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7620                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7621                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7622                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7623                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7624                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7625                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7626                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7627         }
7628
7629         if (rdev->num_crtc >= 6) {
7630                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7631                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7632                                GRPH_PFLIP_INT_CLEAR);
7633                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7634                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7635                                GRPH_PFLIP_INT_CLEAR);
7636                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7637                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7638                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7639                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7640                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7641                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7642                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7643                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7644         }
7645
7646         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7647                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7648                 tmp |= DC_HPDx_INT_ACK;
7649                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7650         }
7651         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7652                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7653                 tmp |= DC_HPDx_INT_ACK;
7654                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7655         }
7656         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7657                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7658                 tmp |= DC_HPDx_INT_ACK;
7659                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7660         }
7661         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7662                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7663                 tmp |= DC_HPDx_INT_ACK;
7664                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7665         }
7666         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7667                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7668                 tmp |= DC_HPDx_INT_ACK;
7669                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7670         }
7671         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7672                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7673                 tmp |= DC_HPDx_INT_ACK;
7674                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7675         }
7676 }
7677
7678 /**
7679  * cik_irq_disable - disable interrupts
7680  *
7681  * @rdev: radeon_device pointer
7682  *
7683  * Disable interrupts on the hw (CIK).
7684  */
7685 static void cik_irq_disable(struct radeon_device *rdev)
7686 {
7687         cik_disable_interrupts(rdev);
7688         /* Wait and acknowledge irq */
7689         mdelay(1);
7690         cik_irq_ack(rdev);
7691         cik_disable_interrupt_state(rdev);
7692 }
7693
7694 /**
7695  * cik_irq_disable - disable interrupts for suspend
7696  *
7697  * @rdev: radeon_device pointer
7698  *
7699  * Disable interrupts and stop the RLC (CIK).
7700  * Used for suspend.
7701  */
7702 static void cik_irq_suspend(struct radeon_device *rdev)
7703 {
7704         cik_irq_disable(rdev);
7705         cik_rlc_stop(rdev);
7706 }
7707
7708 /**
7709  * cik_irq_fini - tear down interrupt support
7710  *
7711  * @rdev: radeon_device pointer
7712  *
7713  * Disable interrupts on the hw and free the IH ring
7714  * buffer (CIK).
7715  * Used for driver unload.
7716  */
7717 static void cik_irq_fini(struct radeon_device *rdev)
7718 {
7719         cik_irq_suspend(rdev);
7720         r600_ih_ring_fini(rdev);
7721 }
7722
7723 /**
7724  * cik_get_ih_wptr - get the IH ring buffer wptr
7725  *
7726  * @rdev: radeon_device pointer
7727  *
7728  * Get the IH ring buffer wptr from either the register
7729  * or the writeback memory buffer (CIK).  Also check for
7730  * ring buffer overflow and deal with it.
7731  * Used by cik_irq_process().
7732  * Returns the value of the wptr.
7733  */
7734 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7735 {
7736         u32 wptr, tmp;
7737
7738         if (rdev->wb.enabled)
7739                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7740         else
7741                 wptr = RREG32(IH_RB_WPTR);
7742
7743         if (wptr & RB_OVERFLOW) {
7744                 /* When a ring buffer overflow happen start parsing interrupt
7745                  * from the last not overwritten vector (wptr + 16). Hopefully
7746                  * this should allow us to catchup.
7747                  */
7748                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7749                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7750                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7751                 tmp = RREG32(IH_RB_CNTL);
7752                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7753                 WREG32(IH_RB_CNTL, tmp);
7754                 wptr &= ~RB_OVERFLOW;
7755         }
7756         return (wptr & rdev->ih.ptr_mask);
7757 }
7758
7759 /*        CIK IV Ring
7760  * Each IV ring entry is 128 bits:
7761  * [7:0]    - interrupt source id
7762  * [31:8]   - reserved
7763  * [59:32]  - interrupt source data
7764  * [63:60]  - reserved
7765  * [71:64]  - RINGID
7766  *            CP:
7767  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7768  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7769  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7770  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7771  *            PIPE_ID - ME0 0=3D
7772  *                    - ME1&2 compute dispatcher (4 pipes each)
7773  *            SDMA:
7774  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7775  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7776  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7777  * [79:72]  - VMID
7778  * [95:80]  - PASID
7779  * [127:96] - reserved
7780  */
7781 /**
7782  * cik_irq_process - interrupt handler
7783  *
7784  * @rdev: radeon_device pointer
7785  *
7786  * Interrupt hander (CIK).  Walk the IH ring,
7787  * ack interrupts and schedule work to handle
7788  * interrupt events.
7789  * Returns irq process return code.
7790  */
7791 int cik_irq_process(struct radeon_device *rdev)
7792 {
7793         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7794         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7795         u32 wptr;
7796         u32 rptr;
7797         u32 src_id, src_data, ring_id;
7798         u8 me_id, pipe_id, queue_id;
7799         u32 ring_index;
7800         bool queue_hotplug = false;
7801         bool queue_reset = false;
7802         u32 addr, status, mc_client;
7803         bool queue_thermal = false;
7804
7805         if (!rdev->ih.enabled || rdev->shutdown)
7806                 return IRQ_NONE;
7807
7808         wptr = cik_get_ih_wptr(rdev);
7809
7810 restart_ih:
7811         /* is somebody else already processing irqs? */
7812         if (atomic_xchg(&rdev->ih.lock, 1))
7813                 return IRQ_NONE;
7814
7815         rptr = rdev->ih.rptr;
7816         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7817
7818         /* Order reading of wptr vs. reading of IH ring data */
7819         rmb();
7820
7821         /* display interrupts */
7822         cik_irq_ack(rdev);
7823
7824         while (rptr != wptr) {
7825                 /* wptr/rptr are in bytes! */
7826                 ring_index = rptr / 4;
7827                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7828                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7829                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7830
7831                 switch (src_id) {
7832                 case 1: /* D1 vblank/vline */
7833                         switch (src_data) {
7834                         case 0: /* D1 vblank */
7835                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7836                                         if (rdev->irq.crtc_vblank_int[0]) {
7837                                                 drm_handle_vblank(rdev->ddev, 0);
7838                                                 rdev->pm.vblank_sync = true;
7839                                                 wake_up(&rdev->irq.vblank_queue);
7840                                         }
7841                                         if (atomic_read(&rdev->irq.pflip[0]))
7842                                                 radeon_crtc_handle_vblank(rdev, 0);
7843                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7844                                         DRM_DEBUG("IH: D1 vblank\n");
7845                                 }
7846                                 break;
7847                         case 1: /* D1 vline */
7848                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7849                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7850                                         DRM_DEBUG("IH: D1 vline\n");
7851                                 }
7852                                 break;
7853                         default:
7854                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7855                                 break;
7856                         }
7857                         break;
7858                 case 2: /* D2 vblank/vline */
7859                         switch (src_data) {
7860                         case 0: /* D2 vblank */
7861                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7862                                         if (rdev->irq.crtc_vblank_int[1]) {
7863                                                 drm_handle_vblank(rdev->ddev, 1);
7864                                                 rdev->pm.vblank_sync = true;
7865                                                 wake_up(&rdev->irq.vblank_queue);
7866                                         }
7867                                         if (atomic_read(&rdev->irq.pflip[1]))
7868                                                 radeon_crtc_handle_vblank(rdev, 1);
7869                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7870                                         DRM_DEBUG("IH: D2 vblank\n");
7871                                 }
7872                                 break;
7873                         case 1: /* D2 vline */
7874                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7875                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7876                                         DRM_DEBUG("IH: D2 vline\n");
7877                                 }
7878                                 break;
7879                         default:
7880                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7881                                 break;
7882                         }
7883                         break;
7884                 case 3: /* D3 vblank/vline */
7885                         switch (src_data) {
7886                         case 0: /* D3 vblank */
7887                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7888                                         if (rdev->irq.crtc_vblank_int[2]) {
7889                                                 drm_handle_vblank(rdev->ddev, 2);
7890                                                 rdev->pm.vblank_sync = true;
7891                                                 wake_up(&rdev->irq.vblank_queue);
7892                                         }
7893                                         if (atomic_read(&rdev->irq.pflip[2]))
7894                                                 radeon_crtc_handle_vblank(rdev, 2);
7895                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7896                                         DRM_DEBUG("IH: D3 vblank\n");
7897                                 }
7898                                 break;
7899                         case 1: /* D3 vline */
7900                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7901                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7902                                         DRM_DEBUG("IH: D3 vline\n");
7903                                 }
7904                                 break;
7905                         default:
7906                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7907                                 break;
7908                         }
7909                         break;
7910                 case 4: /* D4 vblank/vline */
7911                         switch (src_data) {
7912                         case 0: /* D4 vblank */
7913                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7914                                         if (rdev->irq.crtc_vblank_int[3]) {
7915                                                 drm_handle_vblank(rdev->ddev, 3);
7916                                                 rdev->pm.vblank_sync = true;
7917                                                 wake_up(&rdev->irq.vblank_queue);
7918                                         }
7919                                         if (atomic_read(&rdev->irq.pflip[3]))
7920                                                 radeon_crtc_handle_vblank(rdev, 3);
7921                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7922                                         DRM_DEBUG("IH: D4 vblank\n");
7923                                 }
7924                                 break;
7925                         case 1: /* D4 vline */
7926                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7927                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7928                                         DRM_DEBUG("IH: D4 vline\n");
7929                                 }
7930                                 break;
7931                         default:
7932                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7933                                 break;
7934                         }
7935                         break;
7936                 case 5: /* D5 vblank/vline */
7937                         switch (src_data) {
7938                         case 0: /* D5 vblank */
7939                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7940                                         if (rdev->irq.crtc_vblank_int[4]) {
7941                                                 drm_handle_vblank(rdev->ddev, 4);
7942                                                 rdev->pm.vblank_sync = true;
7943                                                 wake_up(&rdev->irq.vblank_queue);
7944                                         }
7945                                         if (atomic_read(&rdev->irq.pflip[4]))
7946                                                 radeon_crtc_handle_vblank(rdev, 4);
7947                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7948                                         DRM_DEBUG("IH: D5 vblank\n");
7949                                 }
7950                                 break;
7951                         case 1: /* D5 vline */
7952                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7953                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7954                                         DRM_DEBUG("IH: D5 vline\n");
7955                                 }
7956                                 break;
7957                         default:
7958                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7959                                 break;
7960                         }
7961                         break;
7962                 case 6: /* D6 vblank/vline */
7963                         switch (src_data) {
7964                         case 0: /* D6 vblank */
7965                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7966                                         if (rdev->irq.crtc_vblank_int[5]) {
7967                                                 drm_handle_vblank(rdev->ddev, 5);
7968                                                 rdev->pm.vblank_sync = true;
7969                                                 wake_up(&rdev->irq.vblank_queue);
7970                                         }
7971                                         if (atomic_read(&rdev->irq.pflip[5]))
7972                                                 radeon_crtc_handle_vblank(rdev, 5);
7973                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7974                                         DRM_DEBUG("IH: D6 vblank\n");
7975                                 }
7976                                 break;
7977                         case 1: /* D6 vline */
7978                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7979                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7980                                         DRM_DEBUG("IH: D6 vline\n");
7981                                 }
7982                                 break;
7983                         default:
7984                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7985                                 break;
7986                         }
7987                         break;
7988                 case 8: /* D1 page flip */
7989                 case 10: /* D2 page flip */
7990                 case 12: /* D3 page flip */
7991                 case 14: /* D4 page flip */
7992                 case 16: /* D5 page flip */
7993                 case 18: /* D6 page flip */
7994                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7995                         if (radeon_use_pflipirq > 0)
7996                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7997                         break;
7998                 case 42: /* HPD hotplug */
7999                         switch (src_data) {
8000                         case 0:
8001                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8002                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8003                                         queue_hotplug = true;
8004                                         DRM_DEBUG("IH: HPD1\n");
8005                                 }
8006                                 break;
8007                         case 1:
8008                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8009                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8010                                         queue_hotplug = true;
8011                                         DRM_DEBUG("IH: HPD2\n");
8012                                 }
8013                                 break;
8014                         case 2:
8015                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8016                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8017                                         queue_hotplug = true;
8018                                         DRM_DEBUG("IH: HPD3\n");
8019                                 }
8020                                 break;
8021                         case 3:
8022                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8023                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8024                                         queue_hotplug = true;
8025                                         DRM_DEBUG("IH: HPD4\n");
8026                                 }
8027                                 break;
8028                         case 4:
8029                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8030                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8031                                         queue_hotplug = true;
8032                                         DRM_DEBUG("IH: HPD5\n");
8033                                 }
8034                                 break;
8035                         case 5:
8036                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8037                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8038                                         queue_hotplug = true;
8039                                         DRM_DEBUG("IH: HPD6\n");
8040                                 }
8041                                 break;
8042                         default:
8043                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8044                                 break;
8045                         }
8046                         break;
8047                 case 124: /* UVD */
8048                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8049                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8050                         break;
8051                 case 146:
8052                 case 147:
8053                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8054                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8055                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8056                         /* reset addr and status */
8057                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8058                         if (addr == 0x0 && status == 0x0)
8059                                 break;
8060                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8061                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8062                                 addr);
8063                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8064                                 status);
8065                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8066                         break;
8067                 case 167: /* VCE */
8068                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8069                         switch (src_data) {
8070                         case 0:
8071                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8072                                 break;
8073                         case 1:
8074                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8075                                 break;
8076                         default:
8077                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8078                                 break;
8079                         }
8080                         break;
8081                 case 176: /* GFX RB CP_INT */
8082                 case 177: /* GFX IB CP_INT */
8083                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8084                         break;
8085                 case 181: /* CP EOP event */
8086                         DRM_DEBUG("IH: CP EOP\n");
8087                         /* XXX check the bitfield order! */
8088                         me_id = (ring_id & 0x60) >> 5;
8089                         pipe_id = (ring_id & 0x18) >> 3;
8090                         queue_id = (ring_id & 0x7) >> 0;
8091                         switch (me_id) {
8092                         case 0:
8093                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8094                                 break;
8095                         case 1:
8096                         case 2:
8097                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8098                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8099                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8100                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8101                                 break;
8102                         }
8103                         break;
8104                 case 184: /* CP Privileged reg access */
8105                         DRM_ERROR("Illegal register access in command stream\n");
8106                         /* XXX check the bitfield order! */
8107                         me_id = (ring_id & 0x60) >> 5;
8108                         pipe_id = (ring_id & 0x18) >> 3;
8109                         queue_id = (ring_id & 0x7) >> 0;
8110                         switch (me_id) {
8111                         case 0:
8112                                 /* This results in a full GPU reset, but all we need to do is soft
8113                                  * reset the CP for gfx
8114                                  */
8115                                 queue_reset = true;
8116                                 break;
8117                         case 1:
8118                                 /* XXX compute */
8119                                 queue_reset = true;
8120                                 break;
8121                         case 2:
8122                                 /* XXX compute */
8123                                 queue_reset = true;
8124                                 break;
8125                         }
8126                         break;
8127                 case 185: /* CP Privileged inst */
8128                         DRM_ERROR("Illegal instruction in command stream\n");
8129                         /* XXX check the bitfield order! */
8130                         me_id = (ring_id & 0x60) >> 5;
8131                         pipe_id = (ring_id & 0x18) >> 3;
8132                         queue_id = (ring_id & 0x7) >> 0;
8133                         switch (me_id) {
8134                         case 0:
8135                                 /* This results in a full GPU reset, but all we need to do is soft
8136                                  * reset the CP for gfx
8137                                  */
8138                                 queue_reset = true;
8139                                 break;
8140                         case 1:
8141                                 /* XXX compute */
8142                                 queue_reset = true;
8143                                 break;
8144                         case 2:
8145                                 /* XXX compute */
8146                                 queue_reset = true;
8147                                 break;
8148                         }
8149                         break;
8150                 case 224: /* SDMA trap event */
8151                         /* XXX check the bitfield order! */
8152                         me_id = (ring_id & 0x3) >> 0;
8153                         queue_id = (ring_id & 0xc) >> 2;
8154                         DRM_DEBUG("IH: SDMA trap\n");
8155                         switch (me_id) {
8156                         case 0:
8157                                 switch (queue_id) {
8158                                 case 0:
8159                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8160                                         break;
8161                                 case 1:
8162                                         /* XXX compute */
8163                                         break;
8164                                 case 2:
8165                                         /* XXX compute */
8166                                         break;
8167                                 }
8168                                 break;
8169                         case 1:
8170                                 switch (queue_id) {
8171                                 case 0:
8172                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8173                                         break;
8174                                 case 1:
8175                                         /* XXX compute */
8176                                         break;
8177                                 case 2:
8178                                         /* XXX compute */
8179                                         break;
8180                                 }
8181                                 break;
8182                         }
8183                         break;
8184                 case 230: /* thermal low to high */
8185                         DRM_DEBUG("IH: thermal low to high\n");
8186                         rdev->pm.dpm.thermal.high_to_low = false;
8187                         queue_thermal = true;
8188                         break;
8189                 case 231: /* thermal high to low */
8190                         DRM_DEBUG("IH: thermal high to low\n");
8191                         rdev->pm.dpm.thermal.high_to_low = true;
8192                         queue_thermal = true;
8193                         break;
8194                 case 233: /* GUI IDLE */
8195                         DRM_DEBUG("IH: GUI idle\n");
8196                         break;
8197                 case 241: /* SDMA Privileged inst */
8198                 case 247: /* SDMA Privileged inst */
8199                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8200                         /* XXX check the bitfield order! */
8201                         me_id = (ring_id & 0x3) >> 0;
8202                         queue_id = (ring_id & 0xc) >> 2;
8203                         switch (me_id) {
8204                         case 0:
8205                                 switch (queue_id) {
8206                                 case 0:
8207                                         queue_reset = true;
8208                                         break;
8209                                 case 1:
8210                                         /* XXX compute */
8211                                         queue_reset = true;
8212                                         break;
8213                                 case 2:
8214                                         /* XXX compute */
8215                                         queue_reset = true;
8216                                         break;
8217                                 }
8218                                 break;
8219                         case 1:
8220                                 switch (queue_id) {
8221                                 case 0:
8222                                         queue_reset = true;
8223                                         break;
8224                                 case 1:
8225                                         /* XXX compute */
8226                                         queue_reset = true;
8227                                         break;
8228                                 case 2:
8229                                         /* XXX compute */
8230                                         queue_reset = true;
8231                                         break;
8232                                 }
8233                                 break;
8234                         }
8235                         break;
8236                 default:
8237                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8238                         break;
8239                 }
8240
8241                 /* wptr/rptr are in bytes! */
8242                 rptr += 16;
8243                 rptr &= rdev->ih.ptr_mask;
8244         }
8245         if (queue_hotplug)
8246                 schedule_work(&rdev->hotplug_work);
8247         if (queue_reset)
8248                 schedule_work(&rdev->reset_work);
8249         if (queue_thermal)
8250                 schedule_work(&rdev->pm.dpm.thermal.work);
8251         rdev->ih.rptr = rptr;
8252         WREG32(IH_RB_RPTR, rdev->ih.rptr);
8253         atomic_set(&rdev->ih.lock, 0);
8254
8255         /* make sure wptr hasn't changed while processing */
8256         wptr = cik_get_ih_wptr(rdev);
8257         if (wptr != rptr)
8258                 goto restart_ih;
8259
8260         return IRQ_HANDLED;
8261 }
8262
8263 /*
8264  * startup/shutdown callbacks
8265  */
8266 /**
8267  * cik_startup - program the asic to a functional state
8268  *
8269  * @rdev: radeon_device pointer
8270  *
8271  * Programs the asic to a functional state (CIK).
8272  * Called by cik_init() and cik_resume().
8273  * Returns 0 for success, error for failure.
8274  */
8275 static int cik_startup(struct radeon_device *rdev)
8276 {
8277         struct radeon_ring *ring;
8278         u32 nop;
8279         int r;
8280
8281         /* enable pcie gen2/3 link */
8282         cik_pcie_gen3_enable(rdev);
8283         /* enable aspm */
8284         cik_program_aspm(rdev);
8285
8286         /* scratch needs to be initialized before MC */
8287         r = r600_vram_scratch_init(rdev);
8288         if (r)
8289                 return r;
8290
8291         cik_mc_program(rdev);
8292
8293         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8294                 r = ci_mc_load_microcode(rdev);
8295                 if (r) {
8296                         DRM_ERROR("Failed to load MC firmware!\n");
8297                         return r;
8298                 }
8299         }
8300
8301         r = cik_pcie_gart_enable(rdev);
8302         if (r)
8303                 return r;
8304         cik_gpu_init(rdev);
8305
8306         /* allocate rlc buffers */
8307         if (rdev->flags & RADEON_IS_IGP) {
8308                 if (rdev->family == CHIP_KAVERI) {
8309                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8310                         rdev->rlc.reg_list_size =
8311                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8312                 } else {
8313                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8314                         rdev->rlc.reg_list_size =
8315                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8316                 }
8317         }
8318         rdev->rlc.cs_data = ci_cs_data;
8319         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8320         r = sumo_rlc_init(rdev);
8321         if (r) {
8322                 DRM_ERROR("Failed to init rlc BOs!\n");
8323                 return r;
8324         }
8325
8326         /* allocate wb buffer */
8327         r = radeon_wb_init(rdev);
8328         if (r)
8329                 return r;
8330
8331         /* allocate mec buffers */
8332         r = cik_mec_init(rdev);
8333         if (r) {
8334                 DRM_ERROR("Failed to init MEC BOs!\n");
8335                 return r;
8336         }
8337
8338         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8339         if (r) {
8340                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8341                 return r;
8342         }
8343
8344         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8345         if (r) {
8346                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8347                 return r;
8348         }
8349
8350         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8351         if (r) {
8352                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8353                 return r;
8354         }
8355
8356         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8357         if (r) {
8358                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8359                 return r;
8360         }
8361
8362         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8363         if (r) {
8364                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8365                 return r;
8366         }
8367
8368         r = radeon_uvd_resume(rdev);
8369         if (!r) {
8370                 r = uvd_v4_2_resume(rdev);
8371                 if (!r) {
8372                         r = radeon_fence_driver_start_ring(rdev,
8373                                                            R600_RING_TYPE_UVD_INDEX);
8374                         if (r)
8375                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8376                 }
8377         }
8378         if (r)
8379                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8380
8381         r = radeon_vce_resume(rdev);
8382         if (!r) {
8383                 r = vce_v2_0_resume(rdev);
8384                 if (!r)
8385                         r = radeon_fence_driver_start_ring(rdev,
8386                                                            TN_RING_TYPE_VCE1_INDEX);
8387                 if (!r)
8388                         r = radeon_fence_driver_start_ring(rdev,
8389                                                            TN_RING_TYPE_VCE2_INDEX);
8390         }
8391         if (r) {
8392                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8393                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8394                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8395         }
8396
8397         /* Enable IRQ */
8398         if (!rdev->irq.installed) {
8399                 r = radeon_irq_kms_init(rdev);
8400                 if (r)
8401                         return r;
8402         }
8403
8404         r = cik_irq_init(rdev);
8405         if (r) {
8406                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8407                 radeon_irq_kms_fini(rdev);
8408                 return r;
8409         }
8410         cik_irq_set(rdev);
8411
8412         if (rdev->family == CHIP_HAWAII) {
8413                 if (rdev->new_fw)
8414                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8415                 else
8416                         nop = RADEON_CP_PACKET2;
8417         } else {
8418                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8419         }
8420
8421         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8422         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8423                              nop);
8424         if (r)
8425                 return r;
8426
8427         /* set up the compute queues */
8428         /* type-2 packets are deprecated on MEC, use type-3 instead */
8429         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8430         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8431                              nop);
8432         if (r)
8433                 return r;
8434         ring->me = 1; /* first MEC */
8435         ring->pipe = 0; /* first pipe */
8436         ring->queue = 0; /* first queue */
8437         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8438
8439         /* type-2 packets are deprecated on MEC, use type-3 instead */
8440         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8441         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8442                              nop);
8443         if (r)
8444                 return r;
8445         /* dGPU only have 1 MEC */
8446         ring->me = 1; /* first MEC */
8447         ring->pipe = 0; /* first pipe */
8448         ring->queue = 1; /* second queue */
8449         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8450
8451         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8452         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8453                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8454         if (r)
8455                 return r;
8456
8457         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8458         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8459                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8460         if (r)
8461                 return r;
8462
8463         r = cik_cp_resume(rdev);
8464         if (r)
8465                 return r;
8466
8467         r = cik_sdma_resume(rdev);
8468         if (r)
8469                 return r;
8470
8471         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8472         if (ring->ring_size) {
8473                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8474                                      RADEON_CP_PACKET2);
8475                 if (!r)
8476                         r = uvd_v1_0_init(rdev);
8477                 if (r)
8478                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8479         }
8480
8481         r = -ENOENT;
8482
8483         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8484         if (ring->ring_size)
8485                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8486                                      VCE_CMD_NO_OP);
8487
8488         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8489         if (ring->ring_size)
8490                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8491                                      VCE_CMD_NO_OP);
8492
8493         if (!r)
8494                 r = vce_v1_0_init(rdev);
8495         else if (r != -ENOENT)
8496                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8497
8498         r = radeon_ib_pool_init(rdev);
8499         if (r) {
8500                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8501                 return r;
8502         }
8503
8504         r = radeon_vm_manager_init(rdev);
8505         if (r) {
8506                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8507                 return r;
8508         }
8509
8510         r = dce6_audio_init(rdev);
8511         if (r)
8512                 return r;
8513
8514         return 0;
8515 }
8516
8517 /**
8518  * cik_resume - resume the asic to a functional state
8519  *
8520  * @rdev: radeon_device pointer
8521  *
8522  * Programs the asic to a functional state (CIK).
8523  * Called at resume.
8524  * Returns 0 for success, error for failure.
8525  */
8526 int cik_resume(struct radeon_device *rdev)
8527 {
8528         int r;
8529
8530         /* post card */
8531         atom_asic_init(rdev->mode_info.atom_context);
8532
8533         /* init golden registers */
8534         cik_init_golden_registers(rdev);
8535
8536         if (rdev->pm.pm_method == PM_METHOD_DPM)
8537                 radeon_pm_resume(rdev);
8538
8539         rdev->accel_working = true;
8540         r = cik_startup(rdev);
8541         if (r) {
8542                 DRM_ERROR("cik startup failed on resume\n");
8543                 rdev->accel_working = false;
8544                 return r;
8545         }
8546
8547         return r;
8548
8549 }
8550
8551 /**
8552  * cik_suspend - suspend the asic
8553  *
8554  * @rdev: radeon_device pointer
8555  *
8556  * Bring the chip into a state suitable for suspend (CIK).
8557  * Called at suspend.
8558  * Returns 0 for success.
8559  */
8560 int cik_suspend(struct radeon_device *rdev)
8561 {
8562         radeon_pm_suspend(rdev);
8563         dce6_audio_fini(rdev);
8564         radeon_vm_manager_fini(rdev);
8565         cik_cp_enable(rdev, false);
8566         cik_sdma_enable(rdev, false);
8567         uvd_v1_0_fini(rdev);
8568         radeon_uvd_suspend(rdev);
8569         radeon_vce_suspend(rdev);
8570         cik_fini_pg(rdev);
8571         cik_fini_cg(rdev);
8572         cik_irq_suspend(rdev);
8573         radeon_wb_disable(rdev);
8574         cik_pcie_gart_disable(rdev);
8575         return 0;
8576 }
8577
8578 /* Plan is to move initialization in that function and use
8579  * helper function so that radeon_device_init pretty much
8580  * do nothing more than calling asic specific function. This
8581  * should also allow to remove a bunch of callback function
8582  * like vram_info.
8583  */
8584 /**
8585  * cik_init - asic specific driver and hw init
8586  *
8587  * @rdev: radeon_device pointer
8588  *
8589  * Setup asic specific driver variables and program the hw
8590  * to a functional state (CIK).
8591  * Called at driver startup.
8592  * Returns 0 for success, errors for failure.
8593  */
8594 int cik_init(struct radeon_device *rdev)
8595 {
8596         struct radeon_ring *ring;
8597         int r;
8598
8599         /* Read BIOS */
8600         if (!radeon_get_bios(rdev)) {
8601                 if (ASIC_IS_AVIVO(rdev))
8602                         return -EINVAL;
8603         }
8604         /* Must be an ATOMBIOS */
8605         if (!rdev->is_atom_bios) {
8606                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8607                 return -EINVAL;
8608         }
8609         r = radeon_atombios_init(rdev);
8610         if (r)
8611                 return r;
8612
8613         /* Post card if necessary */
8614         if (!radeon_card_posted(rdev)) {
8615                 if (!rdev->bios) {
8616                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8617                         return -EINVAL;
8618                 }
8619                 DRM_INFO("GPU not posted. posting now...\n");
8620                 atom_asic_init(rdev->mode_info.atom_context);
8621         }
8622         /* init golden registers */
8623         cik_init_golden_registers(rdev);
8624         /* Initialize scratch registers */
8625         cik_scratch_init(rdev);
8626         /* Initialize surface registers */
8627         radeon_surface_init(rdev);
8628         /* Initialize clocks */
8629         radeon_get_clock_info(rdev->ddev);
8630
8631         /* Fence driver */
8632         r = radeon_fence_driver_init(rdev);
8633         if (r)
8634                 return r;
8635
8636         /* initialize memory controller */
8637         r = cik_mc_init(rdev);
8638         if (r)
8639                 return r;
8640         /* Memory manager */
8641         r = radeon_bo_init(rdev);
8642         if (r)
8643                 return r;
8644
8645         if (rdev->flags & RADEON_IS_IGP) {
8646                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8647                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8648                         r = cik_init_microcode(rdev);
8649                         if (r) {
8650                                 DRM_ERROR("Failed to load firmware!\n");
8651                                 return r;
8652                         }
8653                 }
8654         } else {
8655                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8656                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8657                     !rdev->mc_fw) {
8658                         r = cik_init_microcode(rdev);
8659                         if (r) {
8660                                 DRM_ERROR("Failed to load firmware!\n");
8661                                 return r;
8662                         }
8663                 }
8664         }
8665
8666         /* Initialize power management */
8667         radeon_pm_init(rdev);
8668
8669         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8670         ring->ring_obj = NULL;
8671         r600_ring_init(rdev, ring, 1024 * 1024);
8672
8673         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8674         ring->ring_obj = NULL;
8675         r600_ring_init(rdev, ring, 1024 * 1024);
8676         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8677         if (r)
8678                 return r;
8679
8680         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8681         ring->ring_obj = NULL;
8682         r600_ring_init(rdev, ring, 1024 * 1024);
8683         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8684         if (r)
8685                 return r;
8686
8687         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8688         ring->ring_obj = NULL;
8689         r600_ring_init(rdev, ring, 256 * 1024);
8690
8691         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8692         ring->ring_obj = NULL;
8693         r600_ring_init(rdev, ring, 256 * 1024);
8694
8695         r = radeon_uvd_init(rdev);
8696         if (!r) {
8697                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8698                 ring->ring_obj = NULL;
8699                 r600_ring_init(rdev, ring, 4096);
8700         }
8701
8702         r = radeon_vce_init(rdev);
8703         if (!r) {
8704                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8705                 ring->ring_obj = NULL;
8706                 r600_ring_init(rdev, ring, 4096);
8707
8708                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8709                 ring->ring_obj = NULL;
8710                 r600_ring_init(rdev, ring, 4096);
8711         }
8712
8713         rdev->ih.ring_obj = NULL;
8714         r600_ih_ring_init(rdev, 64 * 1024);
8715
8716         r = r600_pcie_gart_init(rdev);
8717         if (r)
8718                 return r;
8719
8720         rdev->accel_working = true;
8721         r = cik_startup(rdev);
8722         if (r) {
8723                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8724                 cik_cp_fini(rdev);
8725                 cik_sdma_fini(rdev);
8726                 cik_irq_fini(rdev);
8727                 sumo_rlc_fini(rdev);
8728                 cik_mec_fini(rdev);
8729                 radeon_wb_fini(rdev);
8730                 radeon_ib_pool_fini(rdev);
8731                 radeon_vm_manager_fini(rdev);
8732                 radeon_irq_kms_fini(rdev);
8733                 cik_pcie_gart_fini(rdev);
8734                 rdev->accel_working = false;
8735         }
8736
8737         /* Don't start up if the MC ucode is missing.
8738          * The default clocks and voltages before the MC ucode
8739          * is loaded are not suffient for advanced operations.
8740          */
8741         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8742                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8743                 return -EINVAL;
8744         }
8745
8746         return 0;
8747 }
8748
8749 /**
8750  * cik_fini - asic specific driver and hw fini
8751  *
8752  * @rdev: radeon_device pointer
8753  *
8754  * Tear down the asic specific driver variables and program the hw
8755  * to an idle state (CIK).
8756  * Called at driver unload.
8757  */
8758 void cik_fini(struct radeon_device *rdev)
8759 {
8760         radeon_pm_fini(rdev);
8761         cik_cp_fini(rdev);
8762         cik_sdma_fini(rdev);
8763         cik_fini_pg(rdev);
8764         cik_fini_cg(rdev);
8765         cik_irq_fini(rdev);
8766         sumo_rlc_fini(rdev);
8767         cik_mec_fini(rdev);
8768         radeon_wb_fini(rdev);
8769         radeon_vm_manager_fini(rdev);
8770         radeon_ib_pool_fini(rdev);
8771         radeon_irq_kms_fini(rdev);
8772         uvd_v1_0_fini(rdev);
8773         radeon_uvd_fini(rdev);
8774         radeon_vce_fini(rdev);
8775         cik_pcie_gart_fini(rdev);
8776         r600_vram_scratch_fini(rdev);
8777         radeon_gem_fini(rdev);
8778         radeon_fence_driver_fini(rdev);
8779         radeon_bo_fini(rdev);
8780         radeon_atombios_fini(rdev);
8781         kfree(rdev->bios);
8782         rdev->bios = NULL;
8783 }
8784
8785 void dce8_program_fmt(struct drm_encoder *encoder)
8786 {
8787         struct drm_device *dev = encoder->dev;
8788         struct radeon_device *rdev = dev->dev_private;
8789         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8790         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8791         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8792         int bpc = 0;
8793         u32 tmp = 0;
8794         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8795
8796         if (connector) {
8797                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8798                 bpc = radeon_get_monitor_bpc(connector);
8799                 dither = radeon_connector->dither;
8800         }
8801
8802         /* LVDS/eDP FMT is set up by atom */
8803         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8804                 return;
8805
8806         /* not needed for analog */
8807         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8808             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8809                 return;
8810
8811         if (bpc == 0)
8812                 return;
8813
8814         switch (bpc) {
8815         case 6:
8816                 if (dither == RADEON_FMT_DITHER_ENABLE)
8817                         /* XXX sort out optimal dither settings */
8818                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8819                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8820                 else
8821                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8822                 break;
8823         case 8:
8824                 if (dither == RADEON_FMT_DITHER_ENABLE)
8825                         /* XXX sort out optimal dither settings */
8826                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8827                                 FMT_RGB_RANDOM_ENABLE |
8828                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8829                 else
8830                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8831                 break;
8832         case 10:
8833                 if (dither == RADEON_FMT_DITHER_ENABLE)
8834                         /* XXX sort out optimal dither settings */
8835                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8836                                 FMT_RGB_RANDOM_ENABLE |
8837                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8838                 else
8839                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8840                 break;
8841         default:
8842                 /* not needed */
8843                 break;
8844         }
8845
8846         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8847 }
8848
8849 /* display watermark setup */
8850 /**
8851  * dce8_line_buffer_adjust - Set up the line buffer
8852  *
8853  * @rdev: radeon_device pointer
8854  * @radeon_crtc: the selected display controller
8855  * @mode: the current display mode on the selected display
8856  * controller
8857  *
8858  * Setup up the line buffer allocation for
8859  * the selected display controller (CIK).
8860  * Returns the line buffer size in pixels.
8861  */
8862 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8863                                    struct radeon_crtc *radeon_crtc,
8864                                    struct drm_display_mode *mode)
8865 {
8866         u32 tmp, buffer_alloc, i;
8867         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8868         /*
8869          * Line Buffer Setup
8870          * There are 6 line buffers, one for each display controllers.
8871          * There are 3 partitions per LB. Select the number of partitions
8872          * to enable based on the display width.  For display widths larger
8873          * than 4096, you need use to use 2 display controllers and combine
8874          * them using the stereo blender.
8875          */
8876         if (radeon_crtc->base.enabled && mode) {
8877                 if (mode->crtc_hdisplay < 1920) {
8878                         tmp = 1;
8879                         buffer_alloc = 2;
8880                 } else if (mode->crtc_hdisplay < 2560) {
8881                         tmp = 2;
8882                         buffer_alloc = 2;
8883                 } else if (mode->crtc_hdisplay < 4096) {
8884                         tmp = 0;
8885                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8886                 } else {
8887                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8888                         tmp = 0;
8889                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8890                 }
8891         } else {
8892                 tmp = 1;
8893                 buffer_alloc = 0;
8894         }
8895
8896         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8897                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8898
8899         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8900                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8901         for (i = 0; i < rdev->usec_timeout; i++) {
8902                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8903                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8904                         break;
8905                 udelay(1);
8906         }
8907
8908         if (radeon_crtc->base.enabled && mode) {
8909                 switch (tmp) {
8910                 case 0:
8911                 default:
8912                         return 4096 * 2;
8913                 case 1:
8914                         return 1920 * 2;
8915                 case 2:
8916                         return 2560 * 2;
8917                 }
8918         }
8919
8920         /* controller not enabled, so no lb used */
8921         return 0;
8922 }
8923
8924 /**
8925  * cik_get_number_of_dram_channels - get the number of dram channels
8926  *
8927  * @rdev: radeon_device pointer
8928  *
8929  * Look up the number of video ram channels (CIK).
8930  * Used for display watermark bandwidth calculations
8931  * Returns the number of dram channels
8932  */
8933 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8934 {
8935         u32 tmp = RREG32(MC_SHARED_CHMAP);
8936
8937         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8938         case 0:
8939         default:
8940                 return 1;
8941         case 1:
8942                 return 2;
8943         case 2:
8944                 return 4;
8945         case 3:
8946                 return 8;
8947         case 4:
8948                 return 3;
8949         case 5:
8950                 return 6;
8951         case 6:
8952                 return 10;
8953         case 7:
8954                 return 12;
8955         case 8:
8956                 return 16;
8957         }
8958 }
8959
8960 struct dce8_wm_params {
8961         u32 dram_channels; /* number of dram channels */
8962         u32 yclk;          /* bandwidth per dram data pin in kHz */
8963         u32 sclk;          /* engine clock in kHz */
8964         u32 disp_clk;      /* display clock in kHz */
8965         u32 src_width;     /* viewport width */
8966         u32 active_time;   /* active display time in ns */
8967         u32 blank_time;    /* blank time in ns */
8968         bool interlaced;    /* mode is interlaced */
8969         fixed20_12 vsc;    /* vertical scale ratio */
8970         u32 num_heads;     /* number of active crtcs */
8971         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8972         u32 lb_size;       /* line buffer allocated to pipe */
8973         u32 vtaps;         /* vertical scaler taps */
8974 };
8975
8976 /**
8977  * dce8_dram_bandwidth - get the dram bandwidth
8978  *
8979  * @wm: watermark calculation data
8980  *
8981  * Calculate the raw dram bandwidth (CIK).
8982  * Used for display watermark bandwidth calculations
8983  * Returns the dram bandwidth in MBytes/s
8984  */
8985 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8986 {
8987         /* Calculate raw DRAM Bandwidth */
8988         fixed20_12 dram_efficiency; /* 0.7 */
8989         fixed20_12 yclk, dram_channels, bandwidth;
8990         fixed20_12 a;
8991
8992         a.full = dfixed_const(1000);
8993         yclk.full = dfixed_const(wm->yclk);
8994         yclk.full = dfixed_div(yclk, a);
8995         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8996         a.full = dfixed_const(10);
8997         dram_efficiency.full = dfixed_const(7);
8998         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8999         bandwidth.full = dfixed_mul(dram_channels, yclk);
9000         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9001
9002         return dfixed_trunc(bandwidth);
9003 }
9004
9005 /**
9006  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9007  *
9008  * @wm: watermark calculation data
9009  *
9010  * Calculate the dram bandwidth used for display (CIK).
9011  * Used for display watermark bandwidth calculations
9012  * Returns the dram bandwidth for display in MBytes/s
9013  */
9014 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9015 {
9016         /* Calculate DRAM Bandwidth and the part allocated to display. */
9017         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9018         fixed20_12 yclk, dram_channels, bandwidth;
9019         fixed20_12 a;
9020
9021         a.full = dfixed_const(1000);
9022         yclk.full = dfixed_const(wm->yclk);
9023         yclk.full = dfixed_div(yclk, a);
9024         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9025         a.full = dfixed_const(10);
9026         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9027         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9028         bandwidth.full = dfixed_mul(dram_channels, yclk);
9029         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9030
9031         return dfixed_trunc(bandwidth);
9032 }
9033
9034 /**
9035  * dce8_data_return_bandwidth - get the data return bandwidth
9036  *
9037  * @wm: watermark calculation data
9038  *
9039  * Calculate the data return bandwidth used for display (CIK).
9040  * Used for display watermark bandwidth calculations
9041  * Returns the data return bandwidth in MBytes/s
9042  */
9043 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9044 {
9045         /* Calculate the display Data return Bandwidth */
9046         fixed20_12 return_efficiency; /* 0.8 */
9047         fixed20_12 sclk, bandwidth;
9048         fixed20_12 a;
9049
9050         a.full = dfixed_const(1000);
9051         sclk.full = dfixed_const(wm->sclk);
9052         sclk.full = dfixed_div(sclk, a);
9053         a.full = dfixed_const(10);
9054         return_efficiency.full = dfixed_const(8);
9055         return_efficiency.full = dfixed_div(return_efficiency, a);
9056         a.full = dfixed_const(32);
9057         bandwidth.full = dfixed_mul(a, sclk);
9058         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9059
9060         return dfixed_trunc(bandwidth);
9061 }
9062
9063 /**
9064  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9065  *
9066  * @wm: watermark calculation data
9067  *
9068  * Calculate the dmif bandwidth used for display (CIK).
9069  * Used for display watermark bandwidth calculations
9070  * Returns the dmif bandwidth in MBytes/s
9071  */
9072 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9073 {
9074         /* Calculate the DMIF Request Bandwidth */
9075         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9076         fixed20_12 disp_clk, bandwidth;
9077         fixed20_12 a, b;
9078
9079         a.full = dfixed_const(1000);
9080         disp_clk.full = dfixed_const(wm->disp_clk);
9081         disp_clk.full = dfixed_div(disp_clk, a);
9082         a.full = dfixed_const(32);
9083         b.full = dfixed_mul(a, disp_clk);
9084
9085         a.full = dfixed_const(10);
9086         disp_clk_request_efficiency.full = dfixed_const(8);
9087         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9088
9089         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9090
9091         return dfixed_trunc(bandwidth);
9092 }
9093
9094 /**
9095  * dce8_available_bandwidth - get the min available bandwidth
9096  *
9097  * @wm: watermark calculation data
9098  *
9099  * Calculate the min available bandwidth used for display (CIK).
9100  * Used for display watermark bandwidth calculations
9101  * Returns the min available bandwidth in MBytes/s
9102  */
9103 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9104 {
9105         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9106         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9107         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9108         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9109
9110         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9111 }
9112
9113 /**
9114  * dce8_average_bandwidth - get the average available bandwidth
9115  *
9116  * @wm: watermark calculation data
9117  *
9118  * Calculate the average available bandwidth used for display (CIK).
9119  * Used for display watermark bandwidth calculations
9120  * Returns the average available bandwidth in MBytes/s
9121  */
9122 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9123 {
9124         /* Calculate the display mode Average Bandwidth
9125          * DisplayMode should contain the source and destination dimensions,
9126          * timing, etc.
9127          */
9128         fixed20_12 bpp;
9129         fixed20_12 line_time;
9130         fixed20_12 src_width;
9131         fixed20_12 bandwidth;
9132         fixed20_12 a;
9133
9134         a.full = dfixed_const(1000);
9135         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9136         line_time.full = dfixed_div(line_time, a);
9137         bpp.full = dfixed_const(wm->bytes_per_pixel);
9138         src_width.full = dfixed_const(wm->src_width);
9139         bandwidth.full = dfixed_mul(src_width, bpp);
9140         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9141         bandwidth.full = dfixed_div(bandwidth, line_time);
9142
9143         return dfixed_trunc(bandwidth);
9144 }
9145
9146 /**
9147  * dce8_latency_watermark - get the latency watermark
9148  *
9149  * @wm: watermark calculation data
9150  *
9151  * Calculate the latency watermark (CIK).
9152  * Used for display watermark bandwidth calculations
9153  * Returns the latency watermark in ns
9154  */
9155 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9156 {
9157         /* First calculate the latency in ns */
9158         u32 mc_latency = 2000; /* 2000 ns. */
9159         u32 available_bandwidth = dce8_available_bandwidth(wm);
9160         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9161         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9162         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9163         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9164                 (wm->num_heads * cursor_line_pair_return_time);
9165         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9166         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9167         u32 tmp, dmif_size = 12288;
9168         fixed20_12 a, b, c;
9169
9170         if (wm->num_heads == 0)
9171                 return 0;
9172
9173         a.full = dfixed_const(2);
9174         b.full = dfixed_const(1);
9175         if ((wm->vsc.full > a.full) ||
9176             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9177             (wm->vtaps >= 5) ||
9178             ((wm->vsc.full >= a.full) && wm->interlaced))
9179                 max_src_lines_per_dst_line = 4;
9180         else
9181                 max_src_lines_per_dst_line = 2;
9182
9183         a.full = dfixed_const(available_bandwidth);
9184         b.full = dfixed_const(wm->num_heads);
9185         a.full = dfixed_div(a, b);
9186
9187         b.full = dfixed_const(mc_latency + 512);
9188         c.full = dfixed_const(wm->disp_clk);
9189         b.full = dfixed_div(b, c);
9190
9191         c.full = dfixed_const(dmif_size);
9192         b.full = dfixed_div(c, b);
9193
9194         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9195
9196         b.full = dfixed_const(1000);
9197         c.full = dfixed_const(wm->disp_clk);
9198         b.full = dfixed_div(c, b);
9199         c.full = dfixed_const(wm->bytes_per_pixel);
9200         b.full = dfixed_mul(b, c);
9201
9202         lb_fill_bw = min(tmp, dfixed_trunc(b));
9203
9204         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9205         b.full = dfixed_const(1000);
9206         c.full = dfixed_const(lb_fill_bw);
9207         b.full = dfixed_div(c, b);
9208         a.full = dfixed_div(a, b);
9209         line_fill_time = dfixed_trunc(a);
9210
9211         if (line_fill_time < wm->active_time)
9212                 return latency;
9213         else
9214                 return latency + (line_fill_time - wm->active_time);
9215
9216 }
9217
9218 /**
9219  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9220  * average and available dram bandwidth
9221  *
9222  * @wm: watermark calculation data
9223  *
9224  * Check if the display average bandwidth fits in the display
9225  * dram bandwidth (CIK).
9226  * Used for display watermark bandwidth calculations
9227  * Returns true if the display fits, false if not.
9228  */
9229 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9230 {
9231         if (dce8_average_bandwidth(wm) <=
9232             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9233                 return true;
9234         else
9235                 return false;
9236 }
9237
9238 /**
9239  * dce8_average_bandwidth_vs_available_bandwidth - check
9240  * average and available bandwidth
9241  *
9242  * @wm: watermark calculation data
9243  *
9244  * Check if the display average bandwidth fits in the display
9245  * available bandwidth (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns true if the display fits, false if not.
9248  */
9249 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9250 {
9251         if (dce8_average_bandwidth(wm) <=
9252             (dce8_available_bandwidth(wm) / wm->num_heads))
9253                 return true;
9254         else
9255                 return false;
9256 }
9257
9258 /**
9259  * dce8_check_latency_hiding - check latency hiding
9260  *
9261  * @wm: watermark calculation data
9262  *
9263  * Check latency hiding (CIK).
9264  * Used for display watermark bandwidth calculations
9265  * Returns true if the display fits, false if not.
9266  */
9267 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9268 {
9269         u32 lb_partitions = wm->lb_size / wm->src_width;
9270         u32 line_time = wm->active_time + wm->blank_time;
9271         u32 latency_tolerant_lines;
9272         u32 latency_hiding;
9273         fixed20_12 a;
9274
9275         a.full = dfixed_const(1);
9276         if (wm->vsc.full > a.full)
9277                 latency_tolerant_lines = 1;
9278         else {
9279                 if (lb_partitions <= (wm->vtaps + 1))
9280                         latency_tolerant_lines = 1;
9281                 else
9282                         latency_tolerant_lines = 2;
9283         }
9284
9285         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9286
9287         if (dce8_latency_watermark(wm) <= latency_hiding)
9288                 return true;
9289         else
9290                 return false;
9291 }
9292
9293 /**
9294  * dce8_program_watermarks - program display watermarks
9295  *
9296  * @rdev: radeon_device pointer
9297  * @radeon_crtc: the selected display controller
9298  * @lb_size: line buffer size
9299  * @num_heads: number of display controllers in use
9300  *
9301  * Calculate and program the display watermarks for the
9302  * selected display controller (CIK).
9303  */
9304 static void dce8_program_watermarks(struct radeon_device *rdev,
9305                                     struct radeon_crtc *radeon_crtc,
9306                                     u32 lb_size, u32 num_heads)
9307 {
9308         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9309         struct dce8_wm_params wm_low, wm_high;
9310         u32 pixel_period;
9311         u32 line_time = 0;
9312         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9313         u32 tmp, wm_mask;
9314
9315         if (radeon_crtc->base.enabled && num_heads && mode) {
9316                 pixel_period = 1000000 / (u32)mode->clock;
9317                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9318
9319                 /* watermark for high clocks */
9320                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9321                     rdev->pm.dpm_enabled) {
9322                         wm_high.yclk =
9323                                 radeon_dpm_get_mclk(rdev, false) * 10;
9324                         wm_high.sclk =
9325                                 radeon_dpm_get_sclk(rdev, false) * 10;
9326                 } else {
9327                         wm_high.yclk = rdev->pm.current_mclk * 10;
9328                         wm_high.sclk = rdev->pm.current_sclk * 10;
9329                 }
9330
9331                 wm_high.disp_clk = mode->clock;
9332                 wm_high.src_width = mode->crtc_hdisplay;
9333                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9334                 wm_high.blank_time = line_time - wm_high.active_time;
9335                 wm_high.interlaced = false;
9336                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9337                         wm_high.interlaced = true;
9338                 wm_high.vsc = radeon_crtc->vsc;
9339                 wm_high.vtaps = 1;
9340                 if (radeon_crtc->rmx_type != RMX_OFF)
9341                         wm_high.vtaps = 2;
9342                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9343                 wm_high.lb_size = lb_size;
9344                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9345                 wm_high.num_heads = num_heads;
9346
9347                 /* set for high clocks */
9348                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9349
9350                 /* possibly force display priority to high */
9351                 /* should really do this at mode validation time... */
9352                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9353                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9354                     !dce8_check_latency_hiding(&wm_high) ||
9355                     (rdev->disp_priority == 2)) {
9356                         DRM_DEBUG_KMS("force priority to high\n");
9357                 }
9358
9359                 /* watermark for low clocks */
9360                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9361                     rdev->pm.dpm_enabled) {
9362                         wm_low.yclk =
9363                                 radeon_dpm_get_mclk(rdev, true) * 10;
9364                         wm_low.sclk =
9365                                 radeon_dpm_get_sclk(rdev, true) * 10;
9366                 } else {
9367                         wm_low.yclk = rdev->pm.current_mclk * 10;
9368                         wm_low.sclk = rdev->pm.current_sclk * 10;
9369                 }
9370
9371                 wm_low.disp_clk = mode->clock;
9372                 wm_low.src_width = mode->crtc_hdisplay;
9373                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9374                 wm_low.blank_time = line_time - wm_low.active_time;
9375                 wm_low.interlaced = false;
9376                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9377                         wm_low.interlaced = true;
9378                 wm_low.vsc = radeon_crtc->vsc;
9379                 wm_low.vtaps = 1;
9380                 if (radeon_crtc->rmx_type != RMX_OFF)
9381                         wm_low.vtaps = 2;
9382                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9383                 wm_low.lb_size = lb_size;
9384                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9385                 wm_low.num_heads = num_heads;
9386
9387                 /* set for low clocks */
9388                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9389
9390                 /* possibly force display priority to high */
9391                 /* should really do this at mode validation time... */
9392                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9393                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9394                     !dce8_check_latency_hiding(&wm_low) ||
9395                     (rdev->disp_priority == 2)) {
9396                         DRM_DEBUG_KMS("force priority to high\n");
9397                 }
9398         }
9399
9400         /* select wm A */
9401         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9402         tmp = wm_mask;
9403         tmp &= ~LATENCY_WATERMARK_MASK(3);
9404         tmp |= LATENCY_WATERMARK_MASK(1);
9405         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9406         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9407                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9408                 LATENCY_HIGH_WATERMARK(line_time)));
9409         /* select wm B */
9410         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9411         tmp &= ~LATENCY_WATERMARK_MASK(3);
9412         tmp |= LATENCY_WATERMARK_MASK(2);
9413         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9414         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9415                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9416                 LATENCY_HIGH_WATERMARK(line_time)));
9417         /* restore original selection */
9418         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9419
9420         /* save values for DPM */
9421         radeon_crtc->line_time = line_time;
9422         radeon_crtc->wm_high = latency_watermark_a;
9423         radeon_crtc->wm_low = latency_watermark_b;
9424 }
9425
9426 /**
9427  * dce8_bandwidth_update - program display watermarks
9428  *
9429  * @rdev: radeon_device pointer
9430  *
9431  * Calculate and program the display watermarks and line
9432  * buffer allocation (CIK).
9433  */
9434 void dce8_bandwidth_update(struct radeon_device *rdev)
9435 {
9436         struct drm_display_mode *mode = NULL;
9437         u32 num_heads = 0, lb_size;
9438         int i;
9439
9440         radeon_update_display_priority(rdev);
9441
9442         for (i = 0; i < rdev->num_crtc; i++) {
9443                 if (rdev->mode_info.crtcs[i]->base.enabled)
9444                         num_heads++;
9445         }
9446         for (i = 0; i < rdev->num_crtc; i++) {
9447                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9448                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9449                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9450         }
9451 }
9452
9453 /**
9454  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9455  *
9456  * @rdev: radeon_device pointer
9457  *
9458  * Fetches a GPU clock counter snapshot (SI).
9459  * Returns the 64 bit clock counter snapshot.
9460  */
9461 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9462 {
9463         uint64_t clock;
9464
9465         mutex_lock(&rdev->gpu_clock_mutex);
9466         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9467         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9468                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9469         mutex_unlock(&rdev->gpu_clock_mutex);
9470         return clock;
9471 }
9472
9473 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9474                               u32 cntl_reg, u32 status_reg)
9475 {
9476         int r, i;
9477         struct atom_clock_dividers dividers;
9478         uint32_t tmp;
9479
9480         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9481                                            clock, false, &dividers);
9482         if (r)
9483                 return r;
9484
9485         tmp = RREG32_SMC(cntl_reg);
9486         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9487         tmp |= dividers.post_divider;
9488         WREG32_SMC(cntl_reg, tmp);
9489
9490         for (i = 0; i < 100; i++) {
9491                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9492                         break;
9493                 mdelay(10);
9494         }
9495         if (i == 100)
9496                 return -ETIMEDOUT;
9497
9498         return 0;
9499 }
9500
9501 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9502 {
9503         int r = 0;
9504
9505         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9506         if (r)
9507                 return r;
9508
9509         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9510         return r;
9511 }
9512
9513 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9514 {
9515         int r, i;
9516         struct atom_clock_dividers dividers;
9517         u32 tmp;
9518
9519         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9520                                            ecclk, false, &dividers);
9521         if (r)
9522                 return r;
9523
9524         for (i = 0; i < 100; i++) {
9525                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9526                         break;
9527                 mdelay(10);
9528         }
9529         if (i == 100)
9530                 return -ETIMEDOUT;
9531
9532         tmp = RREG32_SMC(CG_ECLK_CNTL);
9533         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9534         tmp |= dividers.post_divider;
9535         WREG32_SMC(CG_ECLK_CNTL, tmp);
9536
9537         for (i = 0; i < 100; i++) {
9538                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9539                         break;
9540                 mdelay(10);
9541         }
9542         if (i == 100)
9543                 return -ETIMEDOUT;
9544
9545         return 0;
9546 }
9547
9548 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9549 {
9550         struct pci_dev *root = rdev->pdev->bus->self;
9551         int bridge_pos, gpu_pos;
9552         u32 speed_cntl, mask, current_data_rate;
9553         int ret, i;
9554         u16 tmp16;
9555
9556         if (radeon_pcie_gen2 == 0)
9557                 return;
9558
9559         if (rdev->flags & RADEON_IS_IGP)
9560                 return;
9561
9562         if (!(rdev->flags & RADEON_IS_PCIE))
9563                 return;
9564
9565         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9566         if (ret != 0)
9567                 return;
9568
9569         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9570                 return;
9571
9572         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9573         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9574                 LC_CURRENT_DATA_RATE_SHIFT;
9575         if (mask & DRM_PCIE_SPEED_80) {
9576                 if (current_data_rate == 2) {
9577                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9578                         return;
9579                 }
9580                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9581         } else if (mask & DRM_PCIE_SPEED_50) {
9582                 if (current_data_rate == 1) {
9583                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9584                         return;
9585                 }
9586                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9587         }
9588
9589         bridge_pos = pci_pcie_cap(root);
9590         if (!bridge_pos)
9591                 return;
9592
9593         gpu_pos = pci_pcie_cap(rdev->pdev);
9594         if (!gpu_pos)
9595                 return;
9596
9597         if (mask & DRM_PCIE_SPEED_80) {
9598                 /* re-try equalization if gen3 is not already enabled */
9599                 if (current_data_rate != 2) {
9600                         u16 bridge_cfg, gpu_cfg;
9601                         u16 bridge_cfg2, gpu_cfg2;
9602                         u32 max_lw, current_lw, tmp;
9603
9604                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9605                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9606
9607                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9608                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9609
9610                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9611                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9612
9613                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9614                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9615                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9616
9617                         if (current_lw < max_lw) {
9618                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9619                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9620                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9621                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9622                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9623                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9624                                 }
9625                         }
9626
9627                         for (i = 0; i < 10; i++) {
9628                                 /* check status */
9629                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9630                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9631                                         break;
9632
9633                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9634                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9635
9636                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9637                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9638
9639                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9640                                 tmp |= LC_SET_QUIESCE;
9641                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9642
9643                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9644                                 tmp |= LC_REDO_EQ;
9645                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9646
9647                                 mdelay(100);
9648
9649                                 /* linkctl */
9650                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9651                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9652                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9653                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9654
9655                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9656                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9657                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9658                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9659
9660                                 /* linkctl2 */
9661                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9662                                 tmp16 &= ~((1 << 4) | (7 << 9));
9663                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9664                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9665
9666                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9667                                 tmp16 &= ~((1 << 4) | (7 << 9));
9668                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9669                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9670
9671                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9672                                 tmp &= ~LC_SET_QUIESCE;
9673                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9674                         }
9675                 }
9676         }
9677
9678         /* set the link speed */
9679         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9680         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9681         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9682
9683         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9684         tmp16 &= ~0xf;
9685         if (mask & DRM_PCIE_SPEED_80)
9686                 tmp16 |= 3; /* gen3 */
9687         else if (mask & DRM_PCIE_SPEED_50)
9688                 tmp16 |= 2; /* gen2 */
9689         else
9690                 tmp16 |= 1; /* gen1 */
9691         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9692
9693         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9694         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9695         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9696
9697         for (i = 0; i < rdev->usec_timeout; i++) {
9698                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9699                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9700                         break;
9701                 udelay(1);
9702         }
9703 }
9704
9705 static void cik_program_aspm(struct radeon_device *rdev)
9706 {
9707         u32 data, orig;
9708         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9709         bool disable_clkreq = false;
9710
9711         if (radeon_aspm == 0)
9712                 return;
9713
9714         /* XXX double check IGPs */
9715         if (rdev->flags & RADEON_IS_IGP)
9716                 return;
9717
9718         if (!(rdev->flags & RADEON_IS_PCIE))
9719                 return;
9720
9721         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9722         data &= ~LC_XMIT_N_FTS_MASK;
9723         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9724         if (orig != data)
9725                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9726
9727         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9728         data |= LC_GO_TO_RECOVERY;
9729         if (orig != data)
9730                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9731
9732         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9733         data |= P_IGNORE_EDB_ERR;
9734         if (orig != data)
9735                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9736
9737         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9738         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9739         data |= LC_PMI_TO_L1_DIS;
9740         if (!disable_l0s)
9741                 data |= LC_L0S_INACTIVITY(7);
9742
9743         if (!disable_l1) {
9744                 data |= LC_L1_INACTIVITY(7);
9745                 data &= ~LC_PMI_TO_L1_DIS;
9746                 if (orig != data)
9747                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9748
9749                 if (!disable_plloff_in_l1) {
9750                         bool clk_req_support;
9751
9752                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9753                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9754                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9755                         if (orig != data)
9756                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9757
9758                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9759                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9760                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9761                         if (orig != data)
9762                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9763
9764                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9765                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9766                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9767                         if (orig != data)
9768                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9769
9770                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9771                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9772                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9773                         if (orig != data)
9774                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9775
9776                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9777                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9778                         data |= LC_DYN_LANES_PWR_STATE(3);
9779                         if (orig != data)
9780                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9781
9782                         if (!disable_clkreq) {
9783                                 struct pci_dev *root = rdev->pdev->bus->self;
9784                                 u32 lnkcap;
9785
9786                                 clk_req_support = false;
9787                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9788                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9789                                         clk_req_support = true;
9790                         } else {
9791                                 clk_req_support = false;
9792                         }
9793
9794                         if (clk_req_support) {
9795                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9796                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9797                                 if (orig != data)
9798                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9799
9800                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9801                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9802                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9803                                 if (orig != data)
9804                                         WREG32_SMC(THM_CLK_CNTL, data);
9805
9806                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9807                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9808                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9809                                 if (orig != data)
9810                                         WREG32_SMC(MISC_CLK_CTRL, data);
9811
9812                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9813                                 data &= ~BCLK_AS_XCLK;
9814                                 if (orig != data)
9815                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9816
9817                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9818                                 data &= ~FORCE_BIF_REFCLK_EN;
9819                                 if (orig != data)
9820                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9821
9822                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9823                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9824                                 data |= MPLL_CLKOUT_SEL(4);
9825                                 if (orig != data)
9826                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9827                         }
9828                 }
9829         } else {
9830                 if (orig != data)
9831                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9832         }
9833
9834         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9835         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9836         if (orig != data)
9837                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9838
9839         if (!disable_l0s) {
9840                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9841                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9842                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9843                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9844                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9845                                 data &= ~LC_L0S_INACTIVITY_MASK;
9846                                 if (orig != data)
9847                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9848                         }
9849                 }
9850         }
9851 }