drm/radeon: rework vm_flush parameters
[linux-2.6-block.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36
37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46
47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48 MODULE_FIRMWARE("radeon/bonaire_me.bin");
49 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55
56 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
65
66 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67 MODULE_FIRMWARE("radeon/hawaii_me.bin");
68 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
73 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
74
75 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
81
82 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
83 MODULE_FIRMWARE("radeon/kaveri_me.bin");
84 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
87 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
88 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
89
90 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
91 MODULE_FIRMWARE("radeon/KABINI_me.bin");
92 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
93 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
94 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
95 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
96
97 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
98 MODULE_FIRMWARE("radeon/kabini_me.bin");
99 MODULE_FIRMWARE("radeon/kabini_ce.bin");
100 MODULE_FIRMWARE("radeon/kabini_mec.bin");
101 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
102 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
103
104 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
110
111 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
112 MODULE_FIRMWARE("radeon/mullins_me.bin");
113 MODULE_FIRMWARE("radeon/mullins_ce.bin");
114 MODULE_FIRMWARE("radeon/mullins_mec.bin");
115 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
116 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
117
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
123 extern void sumo_rlc_fini(struct radeon_device *rdev);
124 extern int sumo_rlc_init(struct radeon_device *rdev);
125 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
126 extern void si_rlc_reset(struct radeon_device *rdev);
127 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
128 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 extern int cik_sdma_resume(struct radeon_device *rdev);
130 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
131 extern void cik_sdma_fini(struct radeon_device *rdev);
132 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
133 static void cik_rlc_stop(struct radeon_device *rdev);
134 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135 static void cik_program_aspm(struct radeon_device *rdev);
136 static void cik_init_pg(struct radeon_device *rdev);
137 static void cik_init_cg(struct radeon_device *rdev);
138 static void cik_fini_pg(struct radeon_device *rdev);
139 static void cik_fini_cg(struct radeon_device *rdev);
140 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141                                           bool enable);
142
143 /* get temperature in millidegrees */
144 int ci_get_temp(struct radeon_device *rdev)
145 {
146         u32 temp;
147         int actual_temp = 0;
148
149         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
150                 CTF_TEMP_SHIFT;
151
152         if (temp & 0x200)
153                 actual_temp = 255;
154         else
155                 actual_temp = temp & 0x1ff;
156
157         actual_temp = actual_temp * 1000;
158
159         return actual_temp;
160 }
161
162 /* get temperature in millidegrees */
163 int kv_get_temp(struct radeon_device *rdev)
164 {
165         u32 temp;
166         int actual_temp = 0;
167
168         temp = RREG32_SMC(0xC0300E0C);
169
170         if (temp)
171                 actual_temp = (temp / 8) - 49;
172         else
173                 actual_temp = 0;
174
175         actual_temp = actual_temp * 1000;
176
177         return actual_temp;
178 }
179
180 /*
181  * Indirect registers accessor
182  */
183 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
184 {
185         unsigned long flags;
186         u32 r;
187
188         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
189         WREG32(PCIE_INDEX, reg);
190         (void)RREG32(PCIE_INDEX);
191         r = RREG32(PCIE_DATA);
192         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
193         return r;
194 }
195
196 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198         unsigned long flags;
199
200         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
201         WREG32(PCIE_INDEX, reg);
202         (void)RREG32(PCIE_INDEX);
203         WREG32(PCIE_DATA, v);
204         (void)RREG32(PCIE_DATA);
205         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
206 }
207
208 static const u32 spectre_rlc_save_restore_register_list[] =
209 {
210         (0x0e00 << 16) | (0xc12c >> 2),
211         0x00000000,
212         (0x0e00 << 16) | (0xc140 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0xc150 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0xc15c >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0xc168 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc170 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc178 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc204 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc2b4 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc2b8 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc2bc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc2c0 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0x8228 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0x829c >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0x869c >> 2),
239         0x00000000,
240         (0x0600 << 16) | (0x98f4 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0x98f8 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0x9900 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc260 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0x90e8 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0x3c000 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0x3c00c >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0x8c1c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0x9700 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xcd20 >> 2),
259         0x00000000,
260         (0x4e00 << 16) | (0xcd20 >> 2),
261         0x00000000,
262         (0x5e00 << 16) | (0xcd20 >> 2),
263         0x00000000,
264         (0x6e00 << 16) | (0xcd20 >> 2),
265         0x00000000,
266         (0x7e00 << 16) | (0xcd20 >> 2),
267         0x00000000,
268         (0x8e00 << 16) | (0xcd20 >> 2),
269         0x00000000,
270         (0x9e00 << 16) | (0xcd20 >> 2),
271         0x00000000,
272         (0xae00 << 16) | (0xcd20 >> 2),
273         0x00000000,
274         (0xbe00 << 16) | (0xcd20 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x89bc >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x8900 >> 2),
279         0x00000000,
280         0x3,
281         (0x0e00 << 16) | (0xc130 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc134 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc1fc >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc208 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc264 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc268 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc26c >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc270 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0xc274 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0xc278 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0xc27c >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0xc280 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc284 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc288 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc28c >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0xc290 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0xc294 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0xc298 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xc29c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xc2a0 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xc2a4 >> 2),
322         0x00000000,
323         (0x0e00 << 16) | (0xc2a8 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0xc2ac  >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0xc2b0 >> 2),
328         0x00000000,
329         (0x0e00 << 16) | (0x301d0 >> 2),
330         0x00000000,
331         (0x0e00 << 16) | (0x30238 >> 2),
332         0x00000000,
333         (0x0e00 << 16) | (0x30250 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x30254 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x30258 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x3025c >> 2),
340         0x00000000,
341         (0x4e00 << 16) | (0xc900 >> 2),
342         0x00000000,
343         (0x5e00 << 16) | (0xc900 >> 2),
344         0x00000000,
345         (0x6e00 << 16) | (0xc900 >> 2),
346         0x00000000,
347         (0x7e00 << 16) | (0xc900 >> 2),
348         0x00000000,
349         (0x8e00 << 16) | (0xc900 >> 2),
350         0x00000000,
351         (0x9e00 << 16) | (0xc900 >> 2),
352         0x00000000,
353         (0xae00 << 16) | (0xc900 >> 2),
354         0x00000000,
355         (0xbe00 << 16) | (0xc900 >> 2),
356         0x00000000,
357         (0x4e00 << 16) | (0xc904 >> 2),
358         0x00000000,
359         (0x5e00 << 16) | (0xc904 >> 2),
360         0x00000000,
361         (0x6e00 << 16) | (0xc904 >> 2),
362         0x00000000,
363         (0x7e00 << 16) | (0xc904 >> 2),
364         0x00000000,
365         (0x8e00 << 16) | (0xc904 >> 2),
366         0x00000000,
367         (0x9e00 << 16) | (0xc904 >> 2),
368         0x00000000,
369         (0xae00 << 16) | (0xc904 >> 2),
370         0x00000000,
371         (0xbe00 << 16) | (0xc904 >> 2),
372         0x00000000,
373         (0x4e00 << 16) | (0xc908 >> 2),
374         0x00000000,
375         (0x5e00 << 16) | (0xc908 >> 2),
376         0x00000000,
377         (0x6e00 << 16) | (0xc908 >> 2),
378         0x00000000,
379         (0x7e00 << 16) | (0xc908 >> 2),
380         0x00000000,
381         (0x8e00 << 16) | (0xc908 >> 2),
382         0x00000000,
383         (0x9e00 << 16) | (0xc908 >> 2),
384         0x00000000,
385         (0xae00 << 16) | (0xc908 >> 2),
386         0x00000000,
387         (0xbe00 << 16) | (0xc908 >> 2),
388         0x00000000,
389         (0x4e00 << 16) | (0xc90c >> 2),
390         0x00000000,
391         (0x5e00 << 16) | (0xc90c >> 2),
392         0x00000000,
393         (0x6e00 << 16) | (0xc90c >> 2),
394         0x00000000,
395         (0x7e00 << 16) | (0xc90c >> 2),
396         0x00000000,
397         (0x8e00 << 16) | (0xc90c >> 2),
398         0x00000000,
399         (0x9e00 << 16) | (0xc90c >> 2),
400         0x00000000,
401         (0xae00 << 16) | (0xc90c >> 2),
402         0x00000000,
403         (0xbe00 << 16) | (0xc90c >> 2),
404         0x00000000,
405         (0x4e00 << 16) | (0xc910 >> 2),
406         0x00000000,
407         (0x5e00 << 16) | (0xc910 >> 2),
408         0x00000000,
409         (0x6e00 << 16) | (0xc910 >> 2),
410         0x00000000,
411         (0x7e00 << 16) | (0xc910 >> 2),
412         0x00000000,
413         (0x8e00 << 16) | (0xc910 >> 2),
414         0x00000000,
415         (0x9e00 << 16) | (0xc910 >> 2),
416         0x00000000,
417         (0xae00 << 16) | (0xc910 >> 2),
418         0x00000000,
419         (0xbe00 << 16) | (0xc910 >> 2),
420         0x00000000,
421         (0x0e00 << 16) | (0xc99c >> 2),
422         0x00000000,
423         (0x0e00 << 16) | (0x9834 >> 2),
424         0x00000000,
425         (0x0000 << 16) | (0x30f00 >> 2),
426         0x00000000,
427         (0x0001 << 16) | (0x30f00 >> 2),
428         0x00000000,
429         (0x0000 << 16) | (0x30f04 >> 2),
430         0x00000000,
431         (0x0001 << 16) | (0x30f04 >> 2),
432         0x00000000,
433         (0x0000 << 16) | (0x30f08 >> 2),
434         0x00000000,
435         (0x0001 << 16) | (0x30f08 >> 2),
436         0x00000000,
437         (0x0000 << 16) | (0x30f0c >> 2),
438         0x00000000,
439         (0x0001 << 16) | (0x30f0c >> 2),
440         0x00000000,
441         (0x0600 << 16) | (0x9b7c >> 2),
442         0x00000000,
443         (0x0e00 << 16) | (0x8a14 >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0x8a18 >> 2),
446         0x00000000,
447         (0x0600 << 16) | (0x30a00 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x8bf0 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x8bcc >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x8b24 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x30a04 >> 2),
456         0x00000000,
457         (0x0600 << 16) | (0x30a10 >> 2),
458         0x00000000,
459         (0x0600 << 16) | (0x30a14 >> 2),
460         0x00000000,
461         (0x0600 << 16) | (0x30a18 >> 2),
462         0x00000000,
463         (0x0600 << 16) | (0x30a2c >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0xc700 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0xc704 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0xc708 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0xc768 >> 2),
472         0x00000000,
473         (0x0400 << 16) | (0xc770 >> 2),
474         0x00000000,
475         (0x0400 << 16) | (0xc774 >> 2),
476         0x00000000,
477         (0x0400 << 16) | (0xc778 >> 2),
478         0x00000000,
479         (0x0400 << 16) | (0xc77c >> 2),
480         0x00000000,
481         (0x0400 << 16) | (0xc780 >> 2),
482         0x00000000,
483         (0x0400 << 16) | (0xc784 >> 2),
484         0x00000000,
485         (0x0400 << 16) | (0xc788 >> 2),
486         0x00000000,
487         (0x0400 << 16) | (0xc78c >> 2),
488         0x00000000,
489         (0x0400 << 16) | (0xc798 >> 2),
490         0x00000000,
491         (0x0400 << 16) | (0xc79c >> 2),
492         0x00000000,
493         (0x0400 << 16) | (0xc7a0 >> 2),
494         0x00000000,
495         (0x0400 << 16) | (0xc7a4 >> 2),
496         0x00000000,
497         (0x0400 << 16) | (0xc7a8 >> 2),
498         0x00000000,
499         (0x0400 << 16) | (0xc7ac >> 2),
500         0x00000000,
501         (0x0400 << 16) | (0xc7b0 >> 2),
502         0x00000000,
503         (0x0400 << 16) | (0xc7b4 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x9100 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x3c010 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x92a8 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0x92ac >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x92b4 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x92b8 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x92bc >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x92c0 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x92c4 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x92c8 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0x92cc >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x92d0 >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0x8c00 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0x8c04 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0x8c20 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0x8c38 >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0x8c3c >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xae00 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0x9604 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xac08 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xac0c >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xac10 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xac14 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0xac58 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0xac68 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0xac6c >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0xac70 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0xac74 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0xac78 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0xac7c >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0xac80 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0xac84 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0xac88 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0xac8c >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x970c >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x9714 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x9718 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x971c >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x31068 >> 2),
582         0x00000000,
583         (0x4e00 << 16) | (0x31068 >> 2),
584         0x00000000,
585         (0x5e00 << 16) | (0x31068 >> 2),
586         0x00000000,
587         (0x6e00 << 16) | (0x31068 >> 2),
588         0x00000000,
589         (0x7e00 << 16) | (0x31068 >> 2),
590         0x00000000,
591         (0x8e00 << 16) | (0x31068 >> 2),
592         0x00000000,
593         (0x9e00 << 16) | (0x31068 >> 2),
594         0x00000000,
595         (0xae00 << 16) | (0x31068 >> 2),
596         0x00000000,
597         (0xbe00 << 16) | (0x31068 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xcd10 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xcd14 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x88b0 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x88b4 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x88b8 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x88bc >> 2),
610         0x00000000,
611         (0x0400 << 16) | (0x89c0 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x88c4 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x88c8 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x88d0 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x88d4 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x88d8 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x8980 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x30938 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x3093c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x30940 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x89a0 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x30900 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x30904 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x89b4 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x3c210 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x3c214 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x3c218 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x8904 >> 2),
646         0x00000000,
647         0x5,
648         (0x0e00 << 16) | (0x8c28 >> 2),
649         (0x0e00 << 16) | (0x8c2c >> 2),
650         (0x0e00 << 16) | (0x8c30 >> 2),
651         (0x0e00 << 16) | (0x8c34 >> 2),
652         (0x0e00 << 16) | (0x9600 >> 2),
653 };
654
655 static const u32 kalindi_rlc_save_restore_register_list[] =
656 {
657         (0x0e00 << 16) | (0xc12c >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc140 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc150 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc15c >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc168 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc170 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc204 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc2b4 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc2b8 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc2bc >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc2c0 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8228 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x829c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x869c >> 2),
684         0x00000000,
685         (0x0600 << 16) | (0x98f4 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x98f8 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x9900 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc260 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x90e8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x3c000 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x3c00c >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x8c1c >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x9700 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xcd20 >> 2),
704         0x00000000,
705         (0x4e00 << 16) | (0xcd20 >> 2),
706         0x00000000,
707         (0x5e00 << 16) | (0xcd20 >> 2),
708         0x00000000,
709         (0x6e00 << 16) | (0xcd20 >> 2),
710         0x00000000,
711         (0x7e00 << 16) | (0xcd20 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0x89bc >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0x8900 >> 2),
716         0x00000000,
717         0x3,
718         (0x0e00 << 16) | (0xc130 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc134 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc1fc >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc208 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc264 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc268 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc26c >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc270 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc274 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc28c >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc290 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc294 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0xc298 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0xc2a0 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0xc2a4 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0xc2a8 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc2ac >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x301d0 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x30238 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x30250 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x30254 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x30258 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x3025c >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xc900 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xc900 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xc900 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xc900 >> 2),
771         0x00000000,
772         (0x4e00 << 16) | (0xc904 >> 2),
773         0x00000000,
774         (0x5e00 << 16) | (0xc904 >> 2),
775         0x00000000,
776         (0x6e00 << 16) | (0xc904 >> 2),
777         0x00000000,
778         (0x7e00 << 16) | (0xc904 >> 2),
779         0x00000000,
780         (0x4e00 << 16) | (0xc908 >> 2),
781         0x00000000,
782         (0x5e00 << 16) | (0xc908 >> 2),
783         0x00000000,
784         (0x6e00 << 16) | (0xc908 >> 2),
785         0x00000000,
786         (0x7e00 << 16) | (0xc908 >> 2),
787         0x00000000,
788         (0x4e00 << 16) | (0xc90c >> 2),
789         0x00000000,
790         (0x5e00 << 16) | (0xc90c >> 2),
791         0x00000000,
792         (0x6e00 << 16) | (0xc90c >> 2),
793         0x00000000,
794         (0x7e00 << 16) | (0xc90c >> 2),
795         0x00000000,
796         (0x4e00 << 16) | (0xc910 >> 2),
797         0x00000000,
798         (0x5e00 << 16) | (0xc910 >> 2),
799         0x00000000,
800         (0x6e00 << 16) | (0xc910 >> 2),
801         0x00000000,
802         (0x7e00 << 16) | (0xc910 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xc99c >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x9834 >> 2),
807         0x00000000,
808         (0x0000 << 16) | (0x30f00 >> 2),
809         0x00000000,
810         (0x0000 << 16) | (0x30f04 >> 2),
811         0x00000000,
812         (0x0000 << 16) | (0x30f08 >> 2),
813         0x00000000,
814         (0x0000 << 16) | (0x30f0c >> 2),
815         0x00000000,
816         (0x0600 << 16) | (0x9b7c >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x8a14 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0x8a18 >> 2),
821         0x00000000,
822         (0x0600 << 16) | (0x30a00 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0x8bf0 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0x8bcc >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0x8b24 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0x30a04 >> 2),
831         0x00000000,
832         (0x0600 << 16) | (0x30a10 >> 2),
833         0x00000000,
834         (0x0600 << 16) | (0x30a14 >> 2),
835         0x00000000,
836         (0x0600 << 16) | (0x30a18 >> 2),
837         0x00000000,
838         (0x0600 << 16) | (0x30a2c >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xc700 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xc704 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xc708 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xc768 >> 2),
847         0x00000000,
848         (0x0400 << 16) | (0xc770 >> 2),
849         0x00000000,
850         (0x0400 << 16) | (0xc774 >> 2),
851         0x00000000,
852         (0x0400 << 16) | (0xc798 >> 2),
853         0x00000000,
854         (0x0400 << 16) | (0xc79c >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x9100 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x3c010 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x8c00 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x8c04 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0x8c20 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x8c38 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x8c3c >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xae00 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x9604 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xac08 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xac0c >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xac10 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xac14 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xac58 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0xac68 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0xac6c >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0xac70 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0xac74 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0xac78 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0xac7c >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0xac80 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0xac84 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0xac88 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0xac8c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x970c >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x9714 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x9718 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x971c >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x31068 >> 2),
913         0x00000000,
914         (0x4e00 << 16) | (0x31068 >> 2),
915         0x00000000,
916         (0x5e00 << 16) | (0x31068 >> 2),
917         0x00000000,
918         (0x6e00 << 16) | (0x31068 >> 2),
919         0x00000000,
920         (0x7e00 << 16) | (0x31068 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0xcd10 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xcd14 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x88b0 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x88b4 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x88b8 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x88bc >> 2),
933         0x00000000,
934         (0x0400 << 16) | (0x89c0 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x88c4 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x88c8 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x88d0 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x88d4 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x88d8 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x8980 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x30938 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x3093c >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0x30940 >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0x89a0 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0x30900 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0x30904 >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0x89b4 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0x3e1fc >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0x3c210 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0x3c214 >> 2),
967         0x00000000,
968         (0x0e00 << 16) | (0x3c218 >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x8904 >> 2),
971         0x00000000,
972         0x5,
973         (0x0e00 << 16) | (0x8c28 >> 2),
974         (0x0e00 << 16) | (0x8c2c >> 2),
975         (0x0e00 << 16) | (0x8c30 >> 2),
976         (0x0e00 << 16) | (0x8c34 >> 2),
977         (0x0e00 << 16) | (0x9600 >> 2),
978 };
979
980 static const u32 bonaire_golden_spm_registers[] =
981 {
982         0x30800, 0xe0ffffff, 0xe0000000
983 };
984
985 static const u32 bonaire_golden_common_registers[] =
986 {
987         0xc770, 0xffffffff, 0x00000800,
988         0xc774, 0xffffffff, 0x00000800,
989         0xc798, 0xffffffff, 0x00007fbf,
990         0xc79c, 0xffffffff, 0x00007faf
991 };
992
993 static const u32 bonaire_golden_registers[] =
994 {
995         0x3354, 0x00000333, 0x00000333,
996         0x3350, 0x000c0fc0, 0x00040200,
997         0x9a10, 0x00010000, 0x00058208,
998         0x3c000, 0xffff1fff, 0x00140000,
999         0x3c200, 0xfdfc0fff, 0x00000100,
1000         0x3c234, 0x40000000, 0x40000200,
1001         0x9830, 0xffffffff, 0x00000000,
1002         0x9834, 0xf00fffff, 0x00000400,
1003         0x9838, 0x0002021c, 0x00020200,
1004         0xc78, 0x00000080, 0x00000000,
1005         0x5bb0, 0x000000f0, 0x00000070,
1006         0x5bc0, 0xf0311fff, 0x80300000,
1007         0x98f8, 0x73773777, 0x12010001,
1008         0x350c, 0x00810000, 0x408af000,
1009         0x7030, 0x31000111, 0x00000011,
1010         0x2f48, 0x73773777, 0x12010001,
1011         0x220c, 0x00007fb6, 0x0021a1b1,
1012         0x2210, 0x00007fb6, 0x002021b1,
1013         0x2180, 0x00007fb6, 0x00002191,
1014         0x2218, 0x00007fb6, 0x002121b1,
1015         0x221c, 0x00007fb6, 0x002021b1,
1016         0x21dc, 0x00007fb6, 0x00002191,
1017         0x21e0, 0x00007fb6, 0x00002191,
1018         0x3628, 0x0000003f, 0x0000000a,
1019         0x362c, 0x0000003f, 0x0000000a,
1020         0x2ae4, 0x00073ffe, 0x000022a2,
1021         0x240c, 0x000007ff, 0x00000000,
1022         0x8a14, 0xf000003f, 0x00000007,
1023         0x8bf0, 0x00002001, 0x00000001,
1024         0x8b24, 0xffffffff, 0x00ffffff,
1025         0x30a04, 0x0000ff0f, 0x00000000,
1026         0x28a4c, 0x07ffffff, 0x06000000,
1027         0x4d8, 0x00000fff, 0x00000100,
1028         0x3e78, 0x00000001, 0x00000002,
1029         0x9100, 0x03000000, 0x0362c688,
1030         0x8c00, 0x000000ff, 0x00000001,
1031         0xe40, 0x00001fff, 0x00001fff,
1032         0x9060, 0x0000007f, 0x00000020,
1033         0x9508, 0x00010000, 0x00010000,
1034         0xac14, 0x000003ff, 0x000000f3,
1035         0xac0c, 0xffffffff, 0x00001032
1036 };
1037
1038 static const u32 bonaire_mgcg_cgcg_init[] =
1039 {
1040         0xc420, 0xffffffff, 0xfffffffc,
1041         0x30800, 0xffffffff, 0xe0000000,
1042         0x3c2a0, 0xffffffff, 0x00000100,
1043         0x3c208, 0xffffffff, 0x00000100,
1044         0x3c2c0, 0xffffffff, 0xc0000100,
1045         0x3c2c8, 0xffffffff, 0xc0000100,
1046         0x3c2c4, 0xffffffff, 0xc0000100,
1047         0x55e4, 0xffffffff, 0x00600100,
1048         0x3c280, 0xffffffff, 0x00000100,
1049         0x3c214, 0xffffffff, 0x06000100,
1050         0x3c220, 0xffffffff, 0x00000100,
1051         0x3c218, 0xffffffff, 0x06000100,
1052         0x3c204, 0xffffffff, 0x00000100,
1053         0x3c2e0, 0xffffffff, 0x00000100,
1054         0x3c224, 0xffffffff, 0x00000100,
1055         0x3c200, 0xffffffff, 0x00000100,
1056         0x3c230, 0xffffffff, 0x00000100,
1057         0x3c234, 0xffffffff, 0x00000100,
1058         0x3c250, 0xffffffff, 0x00000100,
1059         0x3c254, 0xffffffff, 0x00000100,
1060         0x3c258, 0xffffffff, 0x00000100,
1061         0x3c25c, 0xffffffff, 0x00000100,
1062         0x3c260, 0xffffffff, 0x00000100,
1063         0x3c27c, 0xffffffff, 0x00000100,
1064         0x3c278, 0xffffffff, 0x00000100,
1065         0x3c210, 0xffffffff, 0x06000100,
1066         0x3c290, 0xffffffff, 0x00000100,
1067         0x3c274, 0xffffffff, 0x00000100,
1068         0x3c2b4, 0xffffffff, 0x00000100,
1069         0x3c2b0, 0xffffffff, 0x00000100,
1070         0x3c270, 0xffffffff, 0x00000100,
1071         0x30800, 0xffffffff, 0xe0000000,
1072         0x3c020, 0xffffffff, 0x00010000,
1073         0x3c024, 0xffffffff, 0x00030002,
1074         0x3c028, 0xffffffff, 0x00040007,
1075         0x3c02c, 0xffffffff, 0x00060005,
1076         0x3c030, 0xffffffff, 0x00090008,
1077         0x3c034, 0xffffffff, 0x00010000,
1078         0x3c038, 0xffffffff, 0x00030002,
1079         0x3c03c, 0xffffffff, 0x00040007,
1080         0x3c040, 0xffffffff, 0x00060005,
1081         0x3c044, 0xffffffff, 0x00090008,
1082         0x3c048, 0xffffffff, 0x00010000,
1083         0x3c04c, 0xffffffff, 0x00030002,
1084         0x3c050, 0xffffffff, 0x00040007,
1085         0x3c054, 0xffffffff, 0x00060005,
1086         0x3c058, 0xffffffff, 0x00090008,
1087         0x3c05c, 0xffffffff, 0x00010000,
1088         0x3c060, 0xffffffff, 0x00030002,
1089         0x3c064, 0xffffffff, 0x00040007,
1090         0x3c068, 0xffffffff, 0x00060005,
1091         0x3c06c, 0xffffffff, 0x00090008,
1092         0x3c070, 0xffffffff, 0x00010000,
1093         0x3c074, 0xffffffff, 0x00030002,
1094         0x3c078, 0xffffffff, 0x00040007,
1095         0x3c07c, 0xffffffff, 0x00060005,
1096         0x3c080, 0xffffffff, 0x00090008,
1097         0x3c084, 0xffffffff, 0x00010000,
1098         0x3c088, 0xffffffff, 0x00030002,
1099         0x3c08c, 0xffffffff, 0x00040007,
1100         0x3c090, 0xffffffff, 0x00060005,
1101         0x3c094, 0xffffffff, 0x00090008,
1102         0x3c098, 0xffffffff, 0x00010000,
1103         0x3c09c, 0xffffffff, 0x00030002,
1104         0x3c0a0, 0xffffffff, 0x00040007,
1105         0x3c0a4, 0xffffffff, 0x00060005,
1106         0x3c0a8, 0xffffffff, 0x00090008,
1107         0x3c000, 0xffffffff, 0x96e00200,
1108         0x8708, 0xffffffff, 0x00900100,
1109         0xc424, 0xffffffff, 0x0020003f,
1110         0x38, 0xffffffff, 0x0140001c,
1111         0x3c, 0x000f0000, 0x000f0000,
1112         0x220, 0xffffffff, 0xC060000C,
1113         0x224, 0xc0000fff, 0x00000100,
1114         0xf90, 0xffffffff, 0x00000100,
1115         0xf98, 0x00000101, 0x00000000,
1116         0x20a8, 0xffffffff, 0x00000104,
1117         0x55e4, 0xff000fff, 0x00000100,
1118         0x30cc, 0xc0000fff, 0x00000104,
1119         0xc1e4, 0x00000001, 0x00000001,
1120         0xd00c, 0xff000ff0, 0x00000100,
1121         0xd80c, 0xff000ff0, 0x00000100
1122 };
1123
1124 static const u32 spectre_golden_spm_registers[] =
1125 {
1126         0x30800, 0xe0ffffff, 0xe0000000
1127 };
1128
1129 static const u32 spectre_golden_common_registers[] =
1130 {
1131         0xc770, 0xffffffff, 0x00000800,
1132         0xc774, 0xffffffff, 0x00000800,
1133         0xc798, 0xffffffff, 0x00007fbf,
1134         0xc79c, 0xffffffff, 0x00007faf
1135 };
1136
1137 static const u32 spectre_golden_registers[] =
1138 {
1139         0x3c000, 0xffff1fff, 0x96940200,
1140         0x3c00c, 0xffff0001, 0xff000000,
1141         0x3c200, 0xfffc0fff, 0x00000100,
1142         0x6ed8, 0x00010101, 0x00010000,
1143         0x9834, 0xf00fffff, 0x00000400,
1144         0x9838, 0xfffffffc, 0x00020200,
1145         0x5bb0, 0x000000f0, 0x00000070,
1146         0x5bc0, 0xf0311fff, 0x80300000,
1147         0x98f8, 0x73773777, 0x12010001,
1148         0x9b7c, 0x00ff0000, 0x00fc0000,
1149         0x2f48, 0x73773777, 0x12010001,
1150         0x8a14, 0xf000003f, 0x00000007,
1151         0x8b24, 0xffffffff, 0x00ffffff,
1152         0x28350, 0x3f3f3fff, 0x00000082,
1153         0x28354, 0x0000003f, 0x00000000,
1154         0x3e78, 0x00000001, 0x00000002,
1155         0x913c, 0xffff03df, 0x00000004,
1156         0xc768, 0x00000008, 0x00000008,
1157         0x8c00, 0x000008ff, 0x00000800,
1158         0x9508, 0x00010000, 0x00010000,
1159         0xac0c, 0xffffffff, 0x54763210,
1160         0x214f8, 0x01ff01ff, 0x00000002,
1161         0x21498, 0x007ff800, 0x00200000,
1162         0x2015c, 0xffffffff, 0x00000f40,
1163         0x30934, 0xffffffff, 0x00000001
1164 };
1165
1166 static const u32 spectre_mgcg_cgcg_init[] =
1167 {
1168         0xc420, 0xffffffff, 0xfffffffc,
1169         0x30800, 0xffffffff, 0xe0000000,
1170         0x3c2a0, 0xffffffff, 0x00000100,
1171         0x3c208, 0xffffffff, 0x00000100,
1172         0x3c2c0, 0xffffffff, 0x00000100,
1173         0x3c2c8, 0xffffffff, 0x00000100,
1174         0x3c2c4, 0xffffffff, 0x00000100,
1175         0x55e4, 0xffffffff, 0x00600100,
1176         0x3c280, 0xffffffff, 0x00000100,
1177         0x3c214, 0xffffffff, 0x06000100,
1178         0x3c220, 0xffffffff, 0x00000100,
1179         0x3c218, 0xffffffff, 0x06000100,
1180         0x3c204, 0xffffffff, 0x00000100,
1181         0x3c2e0, 0xffffffff, 0x00000100,
1182         0x3c224, 0xffffffff, 0x00000100,
1183         0x3c200, 0xffffffff, 0x00000100,
1184         0x3c230, 0xffffffff, 0x00000100,
1185         0x3c234, 0xffffffff, 0x00000100,
1186         0x3c250, 0xffffffff, 0x00000100,
1187         0x3c254, 0xffffffff, 0x00000100,
1188         0x3c258, 0xffffffff, 0x00000100,
1189         0x3c25c, 0xffffffff, 0x00000100,
1190         0x3c260, 0xffffffff, 0x00000100,
1191         0x3c27c, 0xffffffff, 0x00000100,
1192         0x3c278, 0xffffffff, 0x00000100,
1193         0x3c210, 0xffffffff, 0x06000100,
1194         0x3c290, 0xffffffff, 0x00000100,
1195         0x3c274, 0xffffffff, 0x00000100,
1196         0x3c2b4, 0xffffffff, 0x00000100,
1197         0x3c2b0, 0xffffffff, 0x00000100,
1198         0x3c270, 0xffffffff, 0x00000100,
1199         0x30800, 0xffffffff, 0xe0000000,
1200         0x3c020, 0xffffffff, 0x00010000,
1201         0x3c024, 0xffffffff, 0x00030002,
1202         0x3c028, 0xffffffff, 0x00040007,
1203         0x3c02c, 0xffffffff, 0x00060005,
1204         0x3c030, 0xffffffff, 0x00090008,
1205         0x3c034, 0xffffffff, 0x00010000,
1206         0x3c038, 0xffffffff, 0x00030002,
1207         0x3c03c, 0xffffffff, 0x00040007,
1208         0x3c040, 0xffffffff, 0x00060005,
1209         0x3c044, 0xffffffff, 0x00090008,
1210         0x3c048, 0xffffffff, 0x00010000,
1211         0x3c04c, 0xffffffff, 0x00030002,
1212         0x3c050, 0xffffffff, 0x00040007,
1213         0x3c054, 0xffffffff, 0x00060005,
1214         0x3c058, 0xffffffff, 0x00090008,
1215         0x3c05c, 0xffffffff, 0x00010000,
1216         0x3c060, 0xffffffff, 0x00030002,
1217         0x3c064, 0xffffffff, 0x00040007,
1218         0x3c068, 0xffffffff, 0x00060005,
1219         0x3c06c, 0xffffffff, 0x00090008,
1220         0x3c070, 0xffffffff, 0x00010000,
1221         0x3c074, 0xffffffff, 0x00030002,
1222         0x3c078, 0xffffffff, 0x00040007,
1223         0x3c07c, 0xffffffff, 0x00060005,
1224         0x3c080, 0xffffffff, 0x00090008,
1225         0x3c084, 0xffffffff, 0x00010000,
1226         0x3c088, 0xffffffff, 0x00030002,
1227         0x3c08c, 0xffffffff, 0x00040007,
1228         0x3c090, 0xffffffff, 0x00060005,
1229         0x3c094, 0xffffffff, 0x00090008,
1230         0x3c098, 0xffffffff, 0x00010000,
1231         0x3c09c, 0xffffffff, 0x00030002,
1232         0x3c0a0, 0xffffffff, 0x00040007,
1233         0x3c0a4, 0xffffffff, 0x00060005,
1234         0x3c0a8, 0xffffffff, 0x00090008,
1235         0x3c0ac, 0xffffffff, 0x00010000,
1236         0x3c0b0, 0xffffffff, 0x00030002,
1237         0x3c0b4, 0xffffffff, 0x00040007,
1238         0x3c0b8, 0xffffffff, 0x00060005,
1239         0x3c0bc, 0xffffffff, 0x00090008,
1240         0x3c000, 0xffffffff, 0x96e00200,
1241         0x8708, 0xffffffff, 0x00900100,
1242         0xc424, 0xffffffff, 0x0020003f,
1243         0x38, 0xffffffff, 0x0140001c,
1244         0x3c, 0x000f0000, 0x000f0000,
1245         0x220, 0xffffffff, 0xC060000C,
1246         0x224, 0xc0000fff, 0x00000100,
1247         0xf90, 0xffffffff, 0x00000100,
1248         0xf98, 0x00000101, 0x00000000,
1249         0x20a8, 0xffffffff, 0x00000104,
1250         0x55e4, 0xff000fff, 0x00000100,
1251         0x30cc, 0xc0000fff, 0x00000104,
1252         0xc1e4, 0x00000001, 0x00000001,
1253         0xd00c, 0xff000ff0, 0x00000100,
1254         0xd80c, 0xff000ff0, 0x00000100
1255 };
1256
1257 static const u32 kalindi_golden_spm_registers[] =
1258 {
1259         0x30800, 0xe0ffffff, 0xe0000000
1260 };
1261
1262 static const u32 kalindi_golden_common_registers[] =
1263 {
1264         0xc770, 0xffffffff, 0x00000800,
1265         0xc774, 0xffffffff, 0x00000800,
1266         0xc798, 0xffffffff, 0x00007fbf,
1267         0xc79c, 0xffffffff, 0x00007faf
1268 };
1269
1270 static const u32 kalindi_golden_registers[] =
1271 {
1272         0x3c000, 0xffffdfff, 0x6e944040,
1273         0x55e4, 0xff607fff, 0xfc000100,
1274         0x3c220, 0xff000fff, 0x00000100,
1275         0x3c224, 0xff000fff, 0x00000100,
1276         0x3c200, 0xfffc0fff, 0x00000100,
1277         0x6ed8, 0x00010101, 0x00010000,
1278         0x9830, 0xffffffff, 0x00000000,
1279         0x9834, 0xf00fffff, 0x00000400,
1280         0x5bb0, 0x000000f0, 0x00000070,
1281         0x5bc0, 0xf0311fff, 0x80300000,
1282         0x98f8, 0x73773777, 0x12010001,
1283         0x98fc, 0xffffffff, 0x00000010,
1284         0x9b7c, 0x00ff0000, 0x00fc0000,
1285         0x8030, 0x00001f0f, 0x0000100a,
1286         0x2f48, 0x73773777, 0x12010001,
1287         0x2408, 0x000fffff, 0x000c007f,
1288         0x8a14, 0xf000003f, 0x00000007,
1289         0x8b24, 0x3fff3fff, 0x00ffcfff,
1290         0x30a04, 0x0000ff0f, 0x00000000,
1291         0x28a4c, 0x07ffffff, 0x06000000,
1292         0x4d8, 0x00000fff, 0x00000100,
1293         0x3e78, 0x00000001, 0x00000002,
1294         0xc768, 0x00000008, 0x00000008,
1295         0x8c00, 0x000000ff, 0x00000003,
1296         0x214f8, 0x01ff01ff, 0x00000002,
1297         0x21498, 0x007ff800, 0x00200000,
1298         0x2015c, 0xffffffff, 0x00000f40,
1299         0x88c4, 0x001f3ae3, 0x00000082,
1300         0x88d4, 0x0000001f, 0x00000010,
1301         0x30934, 0xffffffff, 0x00000000
1302 };
1303
1304 static const u32 kalindi_mgcg_cgcg_init[] =
1305 {
1306         0xc420, 0xffffffff, 0xfffffffc,
1307         0x30800, 0xffffffff, 0xe0000000,
1308         0x3c2a0, 0xffffffff, 0x00000100,
1309         0x3c208, 0xffffffff, 0x00000100,
1310         0x3c2c0, 0xffffffff, 0x00000100,
1311         0x3c2c8, 0xffffffff, 0x00000100,
1312         0x3c2c4, 0xffffffff, 0x00000100,
1313         0x55e4, 0xffffffff, 0x00600100,
1314         0x3c280, 0xffffffff, 0x00000100,
1315         0x3c214, 0xffffffff, 0x06000100,
1316         0x3c220, 0xffffffff, 0x00000100,
1317         0x3c218, 0xffffffff, 0x06000100,
1318         0x3c204, 0xffffffff, 0x00000100,
1319         0x3c2e0, 0xffffffff, 0x00000100,
1320         0x3c224, 0xffffffff, 0x00000100,
1321         0x3c200, 0xffffffff, 0x00000100,
1322         0x3c230, 0xffffffff, 0x00000100,
1323         0x3c234, 0xffffffff, 0x00000100,
1324         0x3c250, 0xffffffff, 0x00000100,
1325         0x3c254, 0xffffffff, 0x00000100,
1326         0x3c258, 0xffffffff, 0x00000100,
1327         0x3c25c, 0xffffffff, 0x00000100,
1328         0x3c260, 0xffffffff, 0x00000100,
1329         0x3c27c, 0xffffffff, 0x00000100,
1330         0x3c278, 0xffffffff, 0x00000100,
1331         0x3c210, 0xffffffff, 0x06000100,
1332         0x3c290, 0xffffffff, 0x00000100,
1333         0x3c274, 0xffffffff, 0x00000100,
1334         0x3c2b4, 0xffffffff, 0x00000100,
1335         0x3c2b0, 0xffffffff, 0x00000100,
1336         0x3c270, 0xffffffff, 0x00000100,
1337         0x30800, 0xffffffff, 0xe0000000,
1338         0x3c020, 0xffffffff, 0x00010000,
1339         0x3c024, 0xffffffff, 0x00030002,
1340         0x3c028, 0xffffffff, 0x00040007,
1341         0x3c02c, 0xffffffff, 0x00060005,
1342         0x3c030, 0xffffffff, 0x00090008,
1343         0x3c034, 0xffffffff, 0x00010000,
1344         0x3c038, 0xffffffff, 0x00030002,
1345         0x3c03c, 0xffffffff, 0x00040007,
1346         0x3c040, 0xffffffff, 0x00060005,
1347         0x3c044, 0xffffffff, 0x00090008,
1348         0x3c000, 0xffffffff, 0x96e00200,
1349         0x8708, 0xffffffff, 0x00900100,
1350         0xc424, 0xffffffff, 0x0020003f,
1351         0x38, 0xffffffff, 0x0140001c,
1352         0x3c, 0x000f0000, 0x000f0000,
1353         0x220, 0xffffffff, 0xC060000C,
1354         0x224, 0xc0000fff, 0x00000100,
1355         0x20a8, 0xffffffff, 0x00000104,
1356         0x55e4, 0xff000fff, 0x00000100,
1357         0x30cc, 0xc0000fff, 0x00000104,
1358         0xc1e4, 0x00000001, 0x00000001,
1359         0xd00c, 0xff000ff0, 0x00000100,
1360         0xd80c, 0xff000ff0, 0x00000100
1361 };
1362
1363 static const u32 hawaii_golden_spm_registers[] =
1364 {
1365         0x30800, 0xe0ffffff, 0xe0000000
1366 };
1367
1368 static const u32 hawaii_golden_common_registers[] =
1369 {
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x28350, 0xffffffff, 0x3a00161a,
1372         0x28354, 0xffffffff, 0x0000002e,
1373         0x9a10, 0xffffffff, 0x00018208,
1374         0x98f8, 0xffffffff, 0x12011003
1375 };
1376
1377 static const u32 hawaii_golden_registers[] =
1378 {
1379         0x3354, 0x00000333, 0x00000333,
1380         0x9a10, 0x00010000, 0x00058208,
1381         0x9830, 0xffffffff, 0x00000000,
1382         0x9834, 0xf00fffff, 0x00000400,
1383         0x9838, 0x0002021c, 0x00020200,
1384         0xc78, 0x00000080, 0x00000000,
1385         0x5bb0, 0x000000f0, 0x00000070,
1386         0x5bc0, 0xf0311fff, 0x80300000,
1387         0x350c, 0x00810000, 0x408af000,
1388         0x7030, 0x31000111, 0x00000011,
1389         0x2f48, 0x73773777, 0x12010001,
1390         0x2120, 0x0000007f, 0x0000001b,
1391         0x21dc, 0x00007fb6, 0x00002191,
1392         0x3628, 0x0000003f, 0x0000000a,
1393         0x362c, 0x0000003f, 0x0000000a,
1394         0x2ae4, 0x00073ffe, 0x000022a2,
1395         0x240c, 0x000007ff, 0x00000000,
1396         0x8bf0, 0x00002001, 0x00000001,
1397         0x8b24, 0xffffffff, 0x00ffffff,
1398         0x30a04, 0x0000ff0f, 0x00000000,
1399         0x28a4c, 0x07ffffff, 0x06000000,
1400         0x3e78, 0x00000001, 0x00000002,
1401         0xc768, 0x00000008, 0x00000008,
1402         0xc770, 0x00000f00, 0x00000800,
1403         0xc774, 0x00000f00, 0x00000800,
1404         0xc798, 0x00ffffff, 0x00ff7fbf,
1405         0xc79c, 0x00ffffff, 0x00ff7faf,
1406         0x8c00, 0x000000ff, 0x00000800,
1407         0xe40, 0x00001fff, 0x00001fff,
1408         0x9060, 0x0000007f, 0x00000020,
1409         0x9508, 0x00010000, 0x00010000,
1410         0xae00, 0x00100000, 0x000ff07c,
1411         0xac14, 0x000003ff, 0x0000000f,
1412         0xac10, 0xffffffff, 0x7564fdec,
1413         0xac0c, 0xffffffff, 0x3120b9a8,
1414         0xac08, 0x20000000, 0x0f9c0000
1415 };
1416
1417 static const u32 hawaii_mgcg_cgcg_init[] =
1418 {
1419         0xc420, 0xffffffff, 0xfffffffd,
1420         0x30800, 0xffffffff, 0xe0000000,
1421         0x3c2a0, 0xffffffff, 0x00000100,
1422         0x3c208, 0xffffffff, 0x00000100,
1423         0x3c2c0, 0xffffffff, 0x00000100,
1424         0x3c2c8, 0xffffffff, 0x00000100,
1425         0x3c2c4, 0xffffffff, 0x00000100,
1426         0x55e4, 0xffffffff, 0x00200100,
1427         0x3c280, 0xffffffff, 0x00000100,
1428         0x3c214, 0xffffffff, 0x06000100,
1429         0x3c220, 0xffffffff, 0x00000100,
1430         0x3c218, 0xffffffff, 0x06000100,
1431         0x3c204, 0xffffffff, 0x00000100,
1432         0x3c2e0, 0xffffffff, 0x00000100,
1433         0x3c224, 0xffffffff, 0x00000100,
1434         0x3c200, 0xffffffff, 0x00000100,
1435         0x3c230, 0xffffffff, 0x00000100,
1436         0x3c234, 0xffffffff, 0x00000100,
1437         0x3c250, 0xffffffff, 0x00000100,
1438         0x3c254, 0xffffffff, 0x00000100,
1439         0x3c258, 0xffffffff, 0x00000100,
1440         0x3c25c, 0xffffffff, 0x00000100,
1441         0x3c260, 0xffffffff, 0x00000100,
1442         0x3c27c, 0xffffffff, 0x00000100,
1443         0x3c278, 0xffffffff, 0x00000100,
1444         0x3c210, 0xffffffff, 0x06000100,
1445         0x3c290, 0xffffffff, 0x00000100,
1446         0x3c274, 0xffffffff, 0x00000100,
1447         0x3c2b4, 0xffffffff, 0x00000100,
1448         0x3c2b0, 0xffffffff, 0x00000100,
1449         0x3c270, 0xffffffff, 0x00000100,
1450         0x30800, 0xffffffff, 0xe0000000,
1451         0x3c020, 0xffffffff, 0x00010000,
1452         0x3c024, 0xffffffff, 0x00030002,
1453         0x3c028, 0xffffffff, 0x00040007,
1454         0x3c02c, 0xffffffff, 0x00060005,
1455         0x3c030, 0xffffffff, 0x00090008,
1456         0x3c034, 0xffffffff, 0x00010000,
1457         0x3c038, 0xffffffff, 0x00030002,
1458         0x3c03c, 0xffffffff, 0x00040007,
1459         0x3c040, 0xffffffff, 0x00060005,
1460         0x3c044, 0xffffffff, 0x00090008,
1461         0x3c048, 0xffffffff, 0x00010000,
1462         0x3c04c, 0xffffffff, 0x00030002,
1463         0x3c050, 0xffffffff, 0x00040007,
1464         0x3c054, 0xffffffff, 0x00060005,
1465         0x3c058, 0xffffffff, 0x00090008,
1466         0x3c05c, 0xffffffff, 0x00010000,
1467         0x3c060, 0xffffffff, 0x00030002,
1468         0x3c064, 0xffffffff, 0x00040007,
1469         0x3c068, 0xffffffff, 0x00060005,
1470         0x3c06c, 0xffffffff, 0x00090008,
1471         0x3c070, 0xffffffff, 0x00010000,
1472         0x3c074, 0xffffffff, 0x00030002,
1473         0x3c078, 0xffffffff, 0x00040007,
1474         0x3c07c, 0xffffffff, 0x00060005,
1475         0x3c080, 0xffffffff, 0x00090008,
1476         0x3c084, 0xffffffff, 0x00010000,
1477         0x3c088, 0xffffffff, 0x00030002,
1478         0x3c08c, 0xffffffff, 0x00040007,
1479         0x3c090, 0xffffffff, 0x00060005,
1480         0x3c094, 0xffffffff, 0x00090008,
1481         0x3c098, 0xffffffff, 0x00010000,
1482         0x3c09c, 0xffffffff, 0x00030002,
1483         0x3c0a0, 0xffffffff, 0x00040007,
1484         0x3c0a4, 0xffffffff, 0x00060005,
1485         0x3c0a8, 0xffffffff, 0x00090008,
1486         0x3c0ac, 0xffffffff, 0x00010000,
1487         0x3c0b0, 0xffffffff, 0x00030002,
1488         0x3c0b4, 0xffffffff, 0x00040007,
1489         0x3c0b8, 0xffffffff, 0x00060005,
1490         0x3c0bc, 0xffffffff, 0x00090008,
1491         0x3c0c0, 0xffffffff, 0x00010000,
1492         0x3c0c4, 0xffffffff, 0x00030002,
1493         0x3c0c8, 0xffffffff, 0x00040007,
1494         0x3c0cc, 0xffffffff, 0x00060005,
1495         0x3c0d0, 0xffffffff, 0x00090008,
1496         0x3c0d4, 0xffffffff, 0x00010000,
1497         0x3c0d8, 0xffffffff, 0x00030002,
1498         0x3c0dc, 0xffffffff, 0x00040007,
1499         0x3c0e0, 0xffffffff, 0x00060005,
1500         0x3c0e4, 0xffffffff, 0x00090008,
1501         0x3c0e8, 0xffffffff, 0x00010000,
1502         0x3c0ec, 0xffffffff, 0x00030002,
1503         0x3c0f0, 0xffffffff, 0x00040007,
1504         0x3c0f4, 0xffffffff, 0x00060005,
1505         0x3c0f8, 0xffffffff, 0x00090008,
1506         0xc318, 0xffffffff, 0x00020200,
1507         0x3350, 0xffffffff, 0x00000200,
1508         0x15c0, 0xffffffff, 0x00000400,
1509         0x55e8, 0xffffffff, 0x00000000,
1510         0x2f50, 0xffffffff, 0x00000902,
1511         0x3c000, 0xffffffff, 0x96940200,
1512         0x8708, 0xffffffff, 0x00900100,
1513         0xc424, 0xffffffff, 0x0020003f,
1514         0x38, 0xffffffff, 0x0140001c,
1515         0x3c, 0x000f0000, 0x000f0000,
1516         0x220, 0xffffffff, 0xc060000c,
1517         0x224, 0xc0000fff, 0x00000100,
1518         0xf90, 0xffffffff, 0x00000100,
1519         0xf98, 0x00000101, 0x00000000,
1520         0x20a8, 0xffffffff, 0x00000104,
1521         0x55e4, 0xff000fff, 0x00000100,
1522         0x30cc, 0xc0000fff, 0x00000104,
1523         0xc1e4, 0x00000001, 0x00000001,
1524         0xd00c, 0xff000ff0, 0x00000100,
1525         0xd80c, 0xff000ff0, 0x00000100
1526 };
1527
1528 static const u32 godavari_golden_registers[] =
1529 {
1530         0x55e4, 0xff607fff, 0xfc000100,
1531         0x6ed8, 0x00010101, 0x00010000,
1532         0x9830, 0xffffffff, 0x00000000,
1533         0x98302, 0xf00fffff, 0x00000400,
1534         0x6130, 0xffffffff, 0x00010000,
1535         0x5bb0, 0x000000f0, 0x00000070,
1536         0x5bc0, 0xf0311fff, 0x80300000,
1537         0x98f8, 0x73773777, 0x12010001,
1538         0x98fc, 0xffffffff, 0x00000010,
1539         0x8030, 0x00001f0f, 0x0000100a,
1540         0x2f48, 0x73773777, 0x12010001,
1541         0x2408, 0x000fffff, 0x000c007f,
1542         0x8a14, 0xf000003f, 0x00000007,
1543         0x8b24, 0xffffffff, 0x00ff0fff,
1544         0x30a04, 0x0000ff0f, 0x00000000,
1545         0x28a4c, 0x07ffffff, 0x06000000,
1546         0x4d8, 0x00000fff, 0x00000100,
1547         0xd014, 0x00010000, 0x00810001,
1548         0xd814, 0x00010000, 0x00810001,
1549         0x3e78, 0x00000001, 0x00000002,
1550         0xc768, 0x00000008, 0x00000008,
1551         0xc770, 0x00000f00, 0x00000800,
1552         0xc774, 0x00000f00, 0x00000800,
1553         0xc798, 0x00ffffff, 0x00ff7fbf,
1554         0xc79c, 0x00ffffff, 0x00ff7faf,
1555         0x8c00, 0x000000ff, 0x00000001,
1556         0x214f8, 0x01ff01ff, 0x00000002,
1557         0x21498, 0x007ff800, 0x00200000,
1558         0x2015c, 0xffffffff, 0x00000f40,
1559         0x88c4, 0x001f3ae3, 0x00000082,
1560         0x88d4, 0x0000001f, 0x00000010,
1561         0x30934, 0xffffffff, 0x00000000
1562 };
1563
1564
1565 static void cik_init_golden_registers(struct radeon_device *rdev)
1566 {
1567         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1568         mutex_lock(&rdev->grbm_idx_mutex);
1569         switch (rdev->family) {
1570         case CHIP_BONAIRE:
1571                 radeon_program_register_sequence(rdev,
1572                                                  bonaire_mgcg_cgcg_init,
1573                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1574                 radeon_program_register_sequence(rdev,
1575                                                  bonaire_golden_registers,
1576                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1577                 radeon_program_register_sequence(rdev,
1578                                                  bonaire_golden_common_registers,
1579                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1580                 radeon_program_register_sequence(rdev,
1581                                                  bonaire_golden_spm_registers,
1582                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1583                 break;
1584         case CHIP_KABINI:
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_mgcg_cgcg_init,
1587                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1591                 radeon_program_register_sequence(rdev,
1592                                                  kalindi_golden_common_registers,
1593                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1594                 radeon_program_register_sequence(rdev,
1595                                                  kalindi_golden_spm_registers,
1596                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1597                 break;
1598         case CHIP_MULLINS:
1599                 radeon_program_register_sequence(rdev,
1600                                                  kalindi_mgcg_cgcg_init,
1601                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1602                 radeon_program_register_sequence(rdev,
1603                                                  godavari_golden_registers,
1604                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1605                 radeon_program_register_sequence(rdev,
1606                                                  kalindi_golden_common_registers,
1607                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1608                 radeon_program_register_sequence(rdev,
1609                                                  kalindi_golden_spm_registers,
1610                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1611                 break;
1612         case CHIP_KAVERI:
1613                 radeon_program_register_sequence(rdev,
1614                                                  spectre_mgcg_cgcg_init,
1615                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1616                 radeon_program_register_sequence(rdev,
1617                                                  spectre_golden_registers,
1618                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1619                 radeon_program_register_sequence(rdev,
1620                                                  spectre_golden_common_registers,
1621                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1622                 radeon_program_register_sequence(rdev,
1623                                                  spectre_golden_spm_registers,
1624                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1625                 break;
1626         case CHIP_HAWAII:
1627                 radeon_program_register_sequence(rdev,
1628                                                  hawaii_mgcg_cgcg_init,
1629                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1630                 radeon_program_register_sequence(rdev,
1631                                                  hawaii_golden_registers,
1632                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1633                 radeon_program_register_sequence(rdev,
1634                                                  hawaii_golden_common_registers,
1635                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  hawaii_golden_spm_registers,
1638                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1639                 break;
1640         default:
1641                 break;
1642         }
1643         mutex_unlock(&rdev->grbm_idx_mutex);
1644 }
1645
1646 /**
1647  * cik_get_xclk - get the xclk
1648  *
1649  * @rdev: radeon_device pointer
1650  *
1651  * Returns the reference clock used by the gfx engine
1652  * (CIK).
1653  */
1654 u32 cik_get_xclk(struct radeon_device *rdev)
1655 {
1656         u32 reference_clock = rdev->clock.spll.reference_freq;
1657
1658         if (rdev->flags & RADEON_IS_IGP) {
1659                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1660                         return reference_clock / 2;
1661         } else {
1662                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1663                         return reference_clock / 4;
1664         }
1665         return reference_clock;
1666 }
1667
1668 /**
1669  * cik_mm_rdoorbell - read a doorbell dword
1670  *
1671  * @rdev: radeon_device pointer
1672  * @index: doorbell index
1673  *
1674  * Returns the value in the doorbell aperture at the
1675  * requested doorbell index (CIK).
1676  */
1677 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1678 {
1679         if (index < rdev->doorbell.num_doorbells) {
1680                 return readl(rdev->doorbell.ptr + index);
1681         } else {
1682                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1683                 return 0;
1684         }
1685 }
1686
1687 /**
1688  * cik_mm_wdoorbell - write a doorbell dword
1689  *
1690  * @rdev: radeon_device pointer
1691  * @index: doorbell index
1692  * @v: value to write
1693  *
1694  * Writes @v to the doorbell aperture at the
1695  * requested doorbell index (CIK).
1696  */
1697 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1698 {
1699         if (index < rdev->doorbell.num_doorbells) {
1700                 writel(v, rdev->doorbell.ptr + index);
1701         } else {
1702                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1703         }
1704 }
1705
1706 #define BONAIRE_IO_MC_REGS_SIZE 36
1707
1708 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1709 {
1710         {0x00000070, 0x04400000},
1711         {0x00000071, 0x80c01803},
1712         {0x00000072, 0x00004004},
1713         {0x00000073, 0x00000100},
1714         {0x00000074, 0x00ff0000},
1715         {0x00000075, 0x34000000},
1716         {0x00000076, 0x08000014},
1717         {0x00000077, 0x00cc08ec},
1718         {0x00000078, 0x00000400},
1719         {0x00000079, 0x00000000},
1720         {0x0000007a, 0x04090000},
1721         {0x0000007c, 0x00000000},
1722         {0x0000007e, 0x4408a8e8},
1723         {0x0000007f, 0x00000304},
1724         {0x00000080, 0x00000000},
1725         {0x00000082, 0x00000001},
1726         {0x00000083, 0x00000002},
1727         {0x00000084, 0xf3e4f400},
1728         {0x00000085, 0x052024e3},
1729         {0x00000087, 0x00000000},
1730         {0x00000088, 0x01000000},
1731         {0x0000008a, 0x1c0a0000},
1732         {0x0000008b, 0xff010000},
1733         {0x0000008d, 0xffffefff},
1734         {0x0000008e, 0xfff3efff},
1735         {0x0000008f, 0xfff3efbf},
1736         {0x00000092, 0xf7ffffff},
1737         {0x00000093, 0xffffff7f},
1738         {0x00000095, 0x00101101},
1739         {0x00000096, 0x00000fff},
1740         {0x00000097, 0x00116fff},
1741         {0x00000098, 0x60010000},
1742         {0x00000099, 0x10010000},
1743         {0x0000009a, 0x00006000},
1744         {0x0000009b, 0x00001000},
1745         {0x0000009f, 0x00b48000}
1746 };
1747
1748 #define HAWAII_IO_MC_REGS_SIZE 22
1749
1750 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1751 {
1752         {0x0000007d, 0x40000000},
1753         {0x0000007e, 0x40180304},
1754         {0x0000007f, 0x0000ff00},
1755         {0x00000081, 0x00000000},
1756         {0x00000083, 0x00000800},
1757         {0x00000086, 0x00000000},
1758         {0x00000087, 0x00000100},
1759         {0x00000088, 0x00020100},
1760         {0x00000089, 0x00000000},
1761         {0x0000008b, 0x00040000},
1762         {0x0000008c, 0x00000100},
1763         {0x0000008e, 0xff010000},
1764         {0x00000090, 0xffffefff},
1765         {0x00000091, 0xfff3efff},
1766         {0x00000092, 0xfff3efbf},
1767         {0x00000093, 0xf7ffffff},
1768         {0x00000094, 0xffffff7f},
1769         {0x00000095, 0x00000fff},
1770         {0x00000096, 0x00116fff},
1771         {0x00000097, 0x60010000},
1772         {0x00000098, 0x10010000},
1773         {0x0000009f, 0x00c79000}
1774 };
1775
1776
1777 /**
1778  * cik_srbm_select - select specific register instances
1779  *
1780  * @rdev: radeon_device pointer
1781  * @me: selected ME (micro engine)
1782  * @pipe: pipe
1783  * @queue: queue
1784  * @vmid: VMID
1785  *
1786  * Switches the currently active registers instances.  Some
1787  * registers are instanced per VMID, others are instanced per
1788  * me/pipe/queue combination.
1789  */
1790 static void cik_srbm_select(struct radeon_device *rdev,
1791                             u32 me, u32 pipe, u32 queue, u32 vmid)
1792 {
1793         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1794                              MEID(me & 0x3) |
1795                              VMID(vmid & 0xf) |
1796                              QUEUEID(queue & 0x7));
1797         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1798 }
1799
1800 /* ucode loading */
1801 /**
1802  * ci_mc_load_microcode - load MC ucode into the hw
1803  *
1804  * @rdev: radeon_device pointer
1805  *
1806  * Load the GDDR MC ucode into the hw (CIK).
1807  * Returns 0 on success, error on failure.
1808  */
1809 int ci_mc_load_microcode(struct radeon_device *rdev)
1810 {
1811         const __be32 *fw_data = NULL;
1812         const __le32 *new_fw_data = NULL;
1813         u32 running, blackout = 0, tmp;
1814         u32 *io_mc_regs = NULL;
1815         const __le32 *new_io_mc_regs = NULL;
1816         int i, regs_size, ucode_size;
1817
1818         if (!rdev->mc_fw)
1819                 return -EINVAL;
1820
1821         if (rdev->new_fw) {
1822                 const struct mc_firmware_header_v1_0 *hdr =
1823                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1824
1825                 radeon_ucode_print_mc_hdr(&hdr->header);
1826
1827                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1828                 new_io_mc_regs = (const __le32 *)
1829                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1830                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1831                 new_fw_data = (const __le32 *)
1832                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1833         } else {
1834                 ucode_size = rdev->mc_fw->size / 4;
1835
1836                 switch (rdev->family) {
1837                 case CHIP_BONAIRE:
1838                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1839                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1840                         break;
1841                 case CHIP_HAWAII:
1842                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1843                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1844                         break;
1845                 default:
1846                         return -EINVAL;
1847                 }
1848                 fw_data = (const __be32 *)rdev->mc_fw->data;
1849         }
1850
1851         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1852
1853         if (running == 0) {
1854                 if (running) {
1855                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1856                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1857                 }
1858
1859                 /* reset the engine and set to writable */
1860                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1862
1863                 /* load mc io regs */
1864                 for (i = 0; i < regs_size; i++) {
1865                         if (rdev->new_fw) {
1866                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1867                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1868                         } else {
1869                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1870                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1871                         }
1872                 }
1873
1874                 tmp = RREG32(MC_SEQ_MISC0);
1875                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1876                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1877                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1878                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1879                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1880                 }
1881
1882                 /* load the MC ucode */
1883                 for (i = 0; i < ucode_size; i++) {
1884                         if (rdev->new_fw)
1885                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1886                         else
1887                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1888                 }
1889
1890                 /* put the engine back into the active state */
1891                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1892                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1893                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1894
1895                 /* wait for training to complete */
1896                 for (i = 0; i < rdev->usec_timeout; i++) {
1897                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1898                                 break;
1899                         udelay(1);
1900                 }
1901                 for (i = 0; i < rdev->usec_timeout; i++) {
1902                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1903                                 break;
1904                         udelay(1);
1905                 }
1906
1907                 if (running)
1908                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1909         }
1910
1911         return 0;
1912 }
1913
1914 /**
1915  * cik_init_microcode - load ucode images from disk
1916  *
1917  * @rdev: radeon_device pointer
1918  *
1919  * Use the firmware interface to load the ucode images into
1920  * the driver (not loaded into hw).
1921  * Returns 0 on success, error on failure.
1922  */
1923 static int cik_init_microcode(struct radeon_device *rdev)
1924 {
1925         const char *chip_name;
1926         const char *new_chip_name;
1927         size_t pfp_req_size, me_req_size, ce_req_size,
1928                 mec_req_size, rlc_req_size, mc_req_size = 0,
1929                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1930         char fw_name[30];
1931         int new_fw = 0;
1932         int err;
1933         int num_fw;
1934
1935         DRM_DEBUG("\n");
1936
1937         switch (rdev->family) {
1938         case CHIP_BONAIRE:
1939                 chip_name = "BONAIRE";
1940                 new_chip_name = "bonaire";
1941                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1942                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1943                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1944                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1945                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1946                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1947                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1948                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1949                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1950                 num_fw = 8;
1951                 break;
1952         case CHIP_HAWAII:
1953                 chip_name = "HAWAII";
1954                 new_chip_name = "hawaii";
1955                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1956                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1957                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1958                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1959                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1960                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1961                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1962                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1963                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1964                 num_fw = 8;
1965                 break;
1966         case CHIP_KAVERI:
1967                 chip_name = "KAVERI";
1968                 new_chip_name = "kaveri";
1969                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1971                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1974                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1975                 num_fw = 7;
1976                 break;
1977         case CHIP_KABINI:
1978                 chip_name = "KABINI";
1979                 new_chip_name = "kabini";
1980                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1981                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1982                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1983                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1984                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1985                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1986                 num_fw = 6;
1987                 break;
1988         case CHIP_MULLINS:
1989                 chip_name = "MULLINS";
1990                 new_chip_name = "mullins";
1991                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1992                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1993                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1994                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1995                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1996                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                 num_fw = 6;
1998                 break;
1999         default: BUG();
2000         }
2001
2002         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2003
2004         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2005         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2006         if (err) {
2007                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2008                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2009                 if (err)
2010                         goto out;
2011                 if (rdev->pfp_fw->size != pfp_req_size) {
2012                         printk(KERN_ERR
2013                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2014                                rdev->pfp_fw->size, fw_name);
2015                         err = -EINVAL;
2016                         goto out;
2017                 }
2018         } else {
2019                 err = radeon_ucode_validate(rdev->pfp_fw);
2020                 if (err) {
2021                         printk(KERN_ERR
2022                                "cik_fw: validation failed for firmware \"%s\"\n",
2023                                fw_name);
2024                         goto out;
2025                 } else {
2026                         new_fw++;
2027                 }
2028         }
2029
2030         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2031         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2032         if (err) {
2033                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2034                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2035                 if (err)
2036                         goto out;
2037                 if (rdev->me_fw->size != me_req_size) {
2038                         printk(KERN_ERR
2039                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2040                                rdev->me_fw->size, fw_name);
2041                         err = -EINVAL;
2042                 }
2043         } else {
2044                 err = radeon_ucode_validate(rdev->me_fw);
2045                 if (err) {
2046                         printk(KERN_ERR
2047                                "cik_fw: validation failed for firmware \"%s\"\n",
2048                                fw_name);
2049                         goto out;
2050                 } else {
2051                         new_fw++;
2052                 }
2053         }
2054
2055         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2056         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2057         if (err) {
2058                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2059                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2060                 if (err)
2061                         goto out;
2062                 if (rdev->ce_fw->size != ce_req_size) {
2063                         printk(KERN_ERR
2064                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065                                rdev->ce_fw->size, fw_name);
2066                         err = -EINVAL;
2067                 }
2068         } else {
2069                 err = radeon_ucode_validate(rdev->ce_fw);
2070                 if (err) {
2071                         printk(KERN_ERR
2072                                "cik_fw: validation failed for firmware \"%s\"\n",
2073                                fw_name);
2074                         goto out;
2075                 } else {
2076                         new_fw++;
2077                 }
2078         }
2079
2080         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2081         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2082         if (err) {
2083                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2084                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2085                 if (err)
2086                         goto out;
2087                 if (rdev->mec_fw->size != mec_req_size) {
2088                         printk(KERN_ERR
2089                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2090                                rdev->mec_fw->size, fw_name);
2091                         err = -EINVAL;
2092                 }
2093         } else {
2094                 err = radeon_ucode_validate(rdev->mec_fw);
2095                 if (err) {
2096                         printk(KERN_ERR
2097                                "cik_fw: validation failed for firmware \"%s\"\n",
2098                                fw_name);
2099                         goto out;
2100                 } else {
2101                         new_fw++;
2102                 }
2103         }
2104
2105         if (rdev->family == CHIP_KAVERI) {
2106                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2107                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2108                 if (err) {
2109                         goto out;
2110                 } else {
2111                         err = radeon_ucode_validate(rdev->mec2_fw);
2112                         if (err) {
2113                                 goto out;
2114                         } else {
2115                                 new_fw++;
2116                         }
2117                 }
2118         }
2119
2120         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2121         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2122         if (err) {
2123                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2124                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2125                 if (err)
2126                         goto out;
2127                 if (rdev->rlc_fw->size != rlc_req_size) {
2128                         printk(KERN_ERR
2129                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2130                                rdev->rlc_fw->size, fw_name);
2131                         err = -EINVAL;
2132                 }
2133         } else {
2134                 err = radeon_ucode_validate(rdev->rlc_fw);
2135                 if (err) {
2136                         printk(KERN_ERR
2137                                "cik_fw: validation failed for firmware \"%s\"\n",
2138                                fw_name);
2139                         goto out;
2140                 } else {
2141                         new_fw++;
2142                 }
2143         }
2144
2145         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2146         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2147         if (err) {
2148                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2149                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2150                 if (err)
2151                         goto out;
2152                 if (rdev->sdma_fw->size != sdma_req_size) {
2153                         printk(KERN_ERR
2154                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2155                                rdev->sdma_fw->size, fw_name);
2156                         err = -EINVAL;
2157                 }
2158         } else {
2159                 err = radeon_ucode_validate(rdev->sdma_fw);
2160                 if (err) {
2161                         printk(KERN_ERR
2162                                "cik_fw: validation failed for firmware \"%s\"\n",
2163                                fw_name);
2164                         goto out;
2165                 } else {
2166                         new_fw++;
2167                 }
2168         }
2169
2170         /* No SMC, MC ucode on APUs */
2171         if (!(rdev->flags & RADEON_IS_IGP)) {
2172                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2173                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2174                 if (err) {
2175                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2176                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2177                         if (err) {
2178                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2179                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2180                                 if (err)
2181                                         goto out;
2182                         }
2183                         if ((rdev->mc_fw->size != mc_req_size) &&
2184                             (rdev->mc_fw->size != mc2_req_size)){
2185                                 printk(KERN_ERR
2186                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2187                                        rdev->mc_fw->size, fw_name);
2188                                 err = -EINVAL;
2189                         }
2190                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2191                 } else {
2192                         err = radeon_ucode_validate(rdev->mc_fw);
2193                         if (err) {
2194                                 printk(KERN_ERR
2195                                        "cik_fw: validation failed for firmware \"%s\"\n",
2196                                        fw_name);
2197                                 goto out;
2198                         } else {
2199                                 new_fw++;
2200                         }
2201                 }
2202
2203                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2204                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2205                 if (err) {
2206                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2207                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2208                         if (err) {
2209                                 printk(KERN_ERR
2210                                        "smc: error loading firmware \"%s\"\n",
2211                                        fw_name);
2212                                 release_firmware(rdev->smc_fw);
2213                                 rdev->smc_fw = NULL;
2214                                 err = 0;
2215                         } else if (rdev->smc_fw->size != smc_req_size) {
2216                                 printk(KERN_ERR
2217                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2218                                        rdev->smc_fw->size, fw_name);
2219                                 err = -EINVAL;
2220                         }
2221                 } else {
2222                         err = radeon_ucode_validate(rdev->smc_fw);
2223                         if (err) {
2224                                 printk(KERN_ERR
2225                                        "cik_fw: validation failed for firmware \"%s\"\n",
2226                                        fw_name);
2227                                 goto out;
2228                         } else {
2229                                 new_fw++;
2230                         }
2231                 }
2232         }
2233
2234         if (new_fw == 0) {
2235                 rdev->new_fw = false;
2236         } else if (new_fw < num_fw) {
2237                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2238                 err = -EINVAL;
2239         } else {
2240                 rdev->new_fw = true;
2241         }
2242
2243 out:
2244         if (err) {
2245                 if (err != -EINVAL)
2246                         printk(KERN_ERR
2247                                "cik_cp: Failed to load firmware \"%s\"\n",
2248                                fw_name);
2249                 release_firmware(rdev->pfp_fw);
2250                 rdev->pfp_fw = NULL;
2251                 release_firmware(rdev->me_fw);
2252                 rdev->me_fw = NULL;
2253                 release_firmware(rdev->ce_fw);
2254                 rdev->ce_fw = NULL;
2255                 release_firmware(rdev->mec_fw);
2256                 rdev->mec_fw = NULL;
2257                 release_firmware(rdev->mec2_fw);
2258                 rdev->mec2_fw = NULL;
2259                 release_firmware(rdev->rlc_fw);
2260                 rdev->rlc_fw = NULL;
2261                 release_firmware(rdev->sdma_fw);
2262                 rdev->sdma_fw = NULL;
2263                 release_firmware(rdev->mc_fw);
2264                 rdev->mc_fw = NULL;
2265                 release_firmware(rdev->smc_fw);
2266                 rdev->smc_fw = NULL;
2267         }
2268         return err;
2269 }
2270
2271 /*
2272  * Core functions
2273  */
2274 /**
2275  * cik_tiling_mode_table_init - init the hw tiling table
2276  *
2277  * @rdev: radeon_device pointer
2278  *
2279  * Starting with SI, the tiling setup is done globally in a
2280  * set of 32 tiling modes.  Rather than selecting each set of
2281  * parameters per surface as on older asics, we just select
2282  * which index in the tiling table we want to use, and the
2283  * surface uses those parameters (CIK).
2284  */
2285 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2286 {
2287         const u32 num_tile_mode_states = 32;
2288         const u32 num_secondary_tile_mode_states = 16;
2289         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2290         u32 num_pipe_configs;
2291         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2292                 rdev->config.cik.max_shader_engines;
2293
2294         switch (rdev->config.cik.mem_row_size_in_kb) {
2295         case 1:
2296                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2297                 break;
2298         case 2:
2299         default:
2300                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2301                 break;
2302         case 4:
2303                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2304                 break;
2305         }
2306
2307         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2308         if (num_pipe_configs > 8)
2309                 num_pipe_configs = 16;
2310
2311         if (num_pipe_configs == 16) {
2312                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2313                         switch (reg_offset) {
2314                         case 0:
2315                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2317                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2319                                 break;
2320                         case 1:
2321                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2325                                 break;
2326                         case 2:
2327                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331                                 break;
2332                         case 3:
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2337                                 break;
2338                         case 4:
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                                  TILE_SPLIT(split_equal_to_row_size));
2343                                 break;
2344                         case 5:
2345                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2346                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348                                 break;
2349                         case 6:
2350                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354                                 break;
2355                         case 7:
2356                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                                  TILE_SPLIT(split_equal_to_row_size));
2360                                 break;
2361                         case 8:
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2364                                 break;
2365                         case 9:
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2369                                 break;
2370                         case 10:
2371                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375                                 break;
2376                         case 11:
2377                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2380                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                                 break;
2382                         case 12:
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387                                 break;
2388                         case 13:
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2392                                 break;
2393                         case 14:
2394                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398                                 break;
2399                         case 16:
2400                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2403                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                                 break;
2405                         case 17:
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                                 break;
2411                         case 27:
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2415                                 break;
2416                         case 28:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                                 break;
2422                         case 29:
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                                 break;
2428                         case 30:
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                                 break;
2434                         default:
2435                                 gb_tile_moden = 0;
2436                                 break;
2437                         }
2438                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2439                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2440                 }
2441                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2442                         switch (reg_offset) {
2443                         case 0:
2444                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2448                                 break;
2449                         case 1:
2450                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2454                                 break;
2455                         case 2:
2456                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2460                                 break;
2461                         case 3:
2462                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2466                                 break;
2467                         case 4:
2468                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2472                                 break;
2473                         case 5:
2474                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2478                                 break;
2479                         case 6:
2480                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2484                                 break;
2485                         case 8:
2486                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2490                                 break;
2491                         case 9:
2492                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2496                                 break;
2497                         case 10:
2498                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2502                                 break;
2503                         case 11:
2504                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2508                                 break;
2509                         case 12:
2510                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2514                                 break;
2515                         case 13:
2516                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2520                                 break;
2521                         case 14:
2522                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2526                                 break;
2527                         default:
2528                                 gb_tile_moden = 0;
2529                                 break;
2530                         }
2531                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2532                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2533                 }
2534         } else if (num_pipe_configs == 8) {
2535                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2536                         switch (reg_offset) {
2537                         case 0:
2538                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2542                                 break;
2543                         case 1:
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2548                                 break;
2549                         case 2:
2550                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2552                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2554                                 break;
2555                         case 3:
2556                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2558                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2560                                 break;
2561                         case 4:
2562                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2564                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                                  TILE_SPLIT(split_equal_to_row_size));
2566                                 break;
2567                         case 5:
2568                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                                 break;
2572                         case 6:
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2577                                 break;
2578                         case 7:
2579                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2580                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2581                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                                  TILE_SPLIT(split_equal_to_row_size));
2583                                 break;
2584                         case 8:
2585                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2587                                 break;
2588                         case 9:
2589                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2592                                 break;
2593                         case 10:
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                                 break;
2599                         case 11:
2600                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604                                 break;
2605                         case 12:
2606                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2607                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610                                 break;
2611                         case 13:
2612                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2615                                 break;
2616                         case 14:
2617                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                                 break;
2622                         case 16:
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                                 break;
2628                         case 17:
2629                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                                 break;
2634                         case 27:
2635                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638                                 break;
2639                         case 28:
2640                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2642                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                 break;
2645                         case 29:
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                 break;
2651                         case 30:
2652                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                 break;
2657                         default:
2658                                 gb_tile_moden = 0;
2659                                 break;
2660                         }
2661                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2662                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2663                 }
2664                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2665                         switch (reg_offset) {
2666                         case 0:
2667                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                                 break;
2672                         case 1:
2673                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2675                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                                 break;
2678                         case 2:
2679                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2683                                 break;
2684                         case 3:
2685                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2689                                 break;
2690                         case 4:
2691                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2695                                 break;
2696                         case 5:
2697                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2700                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2701                                 break;
2702                         case 6:
2703                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2707                                 break;
2708                         case 8:
2709                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2713                                 break;
2714                         case 9:
2715                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2719                                 break;
2720                         case 10:
2721                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2725                                 break;
2726                         case 11:
2727                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2731                                 break;
2732                         case 12:
2733                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2737                                 break;
2738                         case 13:
2739                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2743                                 break;
2744                         case 14:
2745                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2749                                 break;
2750                         default:
2751                                 gb_tile_moden = 0;
2752                                 break;
2753                         }
2754                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2755                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756                 }
2757         } else if (num_pipe_configs == 4) {
2758                 if (num_rbs == 4) {
2759                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2760                                 switch (reg_offset) {
2761                                 case 0:
2762                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2766                                         break;
2767                                 case 1:
2768                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2772                                         break;
2773                                 case 2:
2774                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2776                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2778                                         break;
2779                                 case 3:
2780                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2782                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2784                                         break;
2785                                 case 4:
2786                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2788                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                                          TILE_SPLIT(split_equal_to_row_size));
2790                                         break;
2791                                 case 5:
2792                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2793                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795                                         break;
2796                                 case 6:
2797                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2798                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801                                         break;
2802                                 case 7:
2803                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                                          TILE_SPLIT(split_equal_to_row_size));
2807                                         break;
2808                                 case 8:
2809                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2810                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2811                                         break;
2812                                 case 9:
2813                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2816                                         break;
2817                                 case 10:
2818                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2820                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                                         break;
2823                                 case 11:
2824                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828                                         break;
2829                                 case 12:
2830                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2831                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834                                         break;
2835                                 case 13:
2836                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2839                                         break;
2840                                 case 14:
2841                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845                                         break;
2846                                 case 16:
2847                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                                         break;
2852                                 case 17:
2853                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2854                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                                         break;
2858                                 case 27:
2859                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2862                                         break;
2863                                 case 28:
2864                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868                                         break;
2869                                 case 29:
2870                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                                         break;
2875                                 case 30:
2876                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2877                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880                                         break;
2881                                 default:
2882                                         gb_tile_moden = 0;
2883                                         break;
2884                                 }
2885                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2886                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2887                         }
2888                 } else if (num_rbs < 4) {
2889                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2890                                 switch (reg_offset) {
2891                                 case 0:
2892                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2895                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896                                         break;
2897                                 case 1:
2898                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2901                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902                                         break;
2903                                 case 2:
2904                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                                         break;
2909                                 case 3:
2910                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2913                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2914                                         break;
2915                                 case 4:
2916                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2919                                                          TILE_SPLIT(split_equal_to_row_size));
2920                                         break;
2921                                 case 5:
2922                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                                         break;
2926                                 case 6:
2927                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2930                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931                                         break;
2932                                 case 7:
2933                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2934                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2935                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2936                                                          TILE_SPLIT(split_equal_to_row_size));
2937                                         break;
2938                                 case 8:
2939                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2941                                         break;
2942                                 case 9:
2943                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2945                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2946                                         break;
2947                                 case 10:
2948                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2949                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2951                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952                                         break;
2953                                 case 11:
2954                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2955                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2957                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                                         break;
2959                                 case 12:
2960                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2962                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964                                         break;
2965                                 case 13:
2966                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2967                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2968                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2969                                         break;
2970                                 case 14:
2971                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2974                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975                                         break;
2976                                 case 16:
2977                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2979                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2980                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                                         break;
2982                                 case 17:
2983                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2984                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2985                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2986                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                                         break;
2988                                 case 27:
2989                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2992                                         break;
2993                                 case 28:
2994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998                                         break;
2999                                 case 29:
3000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3002                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3003                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3004                                         break;
3005                                 case 30:
3006                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3007                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3009                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                                         break;
3011                                 default:
3012                                         gb_tile_moden = 0;
3013                                         break;
3014                                 }
3015                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3016                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3017                         }
3018                 }
3019                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3020                         switch (reg_offset) {
3021                         case 0:
3022                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3024                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3025                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3026                                 break;
3027                         case 1:
3028                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3032                                 break;
3033                         case 2:
3034                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3037                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3038                                 break;
3039                         case 3:
3040                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3044                                 break;
3045                         case 4:
3046                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3050                                 break;
3051                         case 5:
3052                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3056                                 break;
3057                         case 6:
3058                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3062                                 break;
3063                         case 8:
3064                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3065                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3068                                 break;
3069                         case 9:
3070                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3074                                 break;
3075                         case 10:
3076                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3080                                 break;
3081                         case 11:
3082                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3084                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3086                                 break;
3087                         case 12:
3088                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3091                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3092                                 break;
3093                         case 13:
3094                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3097                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3098                                 break;
3099                         case 14:
3100                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3103                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3104                                 break;
3105                         default:
3106                                 gb_tile_moden = 0;
3107                                 break;
3108                         }
3109                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3110                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3111                 }
3112         } else if (num_pipe_configs == 2) {
3113                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3114                         switch (reg_offset) {
3115                         case 0:
3116                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3118                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3119                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3120                                 break;
3121                         case 1:
3122                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3124                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3126                                 break;
3127                         case 2:
3128                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3129                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3130                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3131                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3132                                 break;
3133                         case 3:
3134                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3136                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3138                                 break;
3139                         case 4:
3140                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3142                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                                  TILE_SPLIT(split_equal_to_row_size));
3144                                 break;
3145                         case 5:
3146                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3147                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149                                 break;
3150                         case 6:
3151                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3152                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3153                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3155                                 break;
3156                         case 7:
3157                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3158                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3159                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                                  TILE_SPLIT(split_equal_to_row_size));
3161                                 break;
3162                         case 8:
3163                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3164                                                 PIPE_CONFIG(ADDR_SURF_P2);
3165                                 break;
3166                         case 9:
3167                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3169                                                  PIPE_CONFIG(ADDR_SURF_P2));
3170                                 break;
3171                         case 10:
3172                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3174                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176                                 break;
3177                         case 11:
3178                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3179                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3180                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182                                 break;
3183                         case 12:
3184                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3185                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3186                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188                                 break;
3189                         case 13:
3190                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3192                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3193                                 break;
3194                         case 14:
3195                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3197                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199                                 break;
3200                         case 16:
3201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3204                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205                                 break;
3206                         case 17:
3207                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3208                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3209                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211                                 break;
3212                         case 27:
3213                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3214                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3215                                                  PIPE_CONFIG(ADDR_SURF_P2));
3216                                 break;
3217                         case 28:
3218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3220                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                                 break;
3223                         case 29:
3224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3227                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228                                 break;
3229                         case 30:
3230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234                                 break;
3235                         default:
3236                                 gb_tile_moden = 0;
3237                                 break;
3238                         }
3239                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3240                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3241                 }
3242                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3243                         switch (reg_offset) {
3244                         case 0:
3245                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3247                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                                 break;
3250                         case 1:
3251                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3252                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3253                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3255                                 break;
3256                         case 2:
3257                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3259                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3261                                 break;
3262                         case 3:
3263                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3264                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3265                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3267                                 break;
3268                         case 4:
3269                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3273                                 break;
3274                         case 5:
3275                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3277                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3279                                 break;
3280                         case 6:
3281                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3285                                 break;
3286                         case 8:
3287                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3291                                 break;
3292                         case 9:
3293                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3297                                 break;
3298                         case 10:
3299                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3300                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3301                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3302                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3303                                 break;
3304                         case 11:
3305                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3309                                 break;
3310                         case 12:
3311                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3312                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3313                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3315                                 break;
3316                         case 13:
3317                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3321                                 break;
3322                         case 14:
3323                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3327                                 break;
3328                         default:
3329                                 gb_tile_moden = 0;
3330                                 break;
3331                         }
3332                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3333                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3334                 }
3335         } else
3336                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3337 }
3338
3339 /**
3340  * cik_select_se_sh - select which SE, SH to address
3341  *
3342  * @rdev: radeon_device pointer
3343  * @se_num: shader engine to address
3344  * @sh_num: sh block to address
3345  *
3346  * Select which SE, SH combinations to address. Certain
3347  * registers are instanced per SE or SH.  0xffffffff means
3348  * broadcast to all SEs or SHs (CIK).
3349  */
3350 static void cik_select_se_sh(struct radeon_device *rdev,
3351                              u32 se_num, u32 sh_num)
3352 {
3353         u32 data = INSTANCE_BROADCAST_WRITES;
3354
3355         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3356                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3357         else if (se_num == 0xffffffff)
3358                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3359         else if (sh_num == 0xffffffff)
3360                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3361         else
3362                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3363         WREG32(GRBM_GFX_INDEX, data);
3364 }
3365
3366 /**
3367  * cik_create_bitmask - create a bitmask
3368  *
3369  * @bit_width: length of the mask
3370  *
3371  * create a variable length bit mask (CIK).
3372  * Returns the bitmask.
3373  */
3374 static u32 cik_create_bitmask(u32 bit_width)
3375 {
3376         u32 i, mask = 0;
3377
3378         for (i = 0; i < bit_width; i++) {
3379                 mask <<= 1;
3380                 mask |= 1;
3381         }
3382         return mask;
3383 }
3384
3385 /**
3386  * cik_get_rb_disabled - computes the mask of disabled RBs
3387  *
3388  * @rdev: radeon_device pointer
3389  * @max_rb_num: max RBs (render backends) for the asic
3390  * @se_num: number of SEs (shader engines) for the asic
3391  * @sh_per_se: number of SH blocks per SE for the asic
3392  *
3393  * Calculates the bitmask of disabled RBs (CIK).
3394  * Returns the disabled RB bitmask.
3395  */
3396 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3397                               u32 max_rb_num_per_se,
3398                               u32 sh_per_se)
3399 {
3400         u32 data, mask;
3401
3402         data = RREG32(CC_RB_BACKEND_DISABLE);
3403         if (data & 1)
3404                 data &= BACKEND_DISABLE_MASK;
3405         else
3406                 data = 0;
3407         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3408
3409         data >>= BACKEND_DISABLE_SHIFT;
3410
3411         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3412
3413         return data & mask;
3414 }
3415
3416 /**
3417  * cik_setup_rb - setup the RBs on the asic
3418  *
3419  * @rdev: radeon_device pointer
3420  * @se_num: number of SEs (shader engines) for the asic
3421  * @sh_per_se: number of SH blocks per SE for the asic
3422  * @max_rb_num: max RBs (render backends) for the asic
3423  *
3424  * Configures per-SE/SH RB registers (CIK).
3425  */
3426 static void cik_setup_rb(struct radeon_device *rdev,
3427                          u32 se_num, u32 sh_per_se,
3428                          u32 max_rb_num_per_se)
3429 {
3430         int i, j;
3431         u32 data, mask;
3432         u32 disabled_rbs = 0;
3433         u32 enabled_rbs = 0;
3434
3435         mutex_lock(&rdev->grbm_idx_mutex);
3436         for (i = 0; i < se_num; i++) {
3437                 for (j = 0; j < sh_per_se; j++) {
3438                         cik_select_se_sh(rdev, i, j);
3439                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3440                         if (rdev->family == CHIP_HAWAII)
3441                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3442                         else
3443                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3444                 }
3445         }
3446         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3447         mutex_unlock(&rdev->grbm_idx_mutex);
3448
3449         mask = 1;
3450         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3451                 if (!(disabled_rbs & mask))
3452                         enabled_rbs |= mask;
3453                 mask <<= 1;
3454         }
3455
3456         rdev->config.cik.backend_enable_mask = enabled_rbs;
3457
3458         mutex_lock(&rdev->grbm_idx_mutex);
3459         for (i = 0; i < se_num; i++) {
3460                 cik_select_se_sh(rdev, i, 0xffffffff);
3461                 data = 0;
3462                 for (j = 0; j < sh_per_se; j++) {
3463                         switch (enabled_rbs & 3) {
3464                         case 0:
3465                                 if (j == 0)
3466                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3467                                 else
3468                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3469                                 break;
3470                         case 1:
3471                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3472                                 break;
3473                         case 2:
3474                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3475                                 break;
3476                         case 3:
3477                         default:
3478                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3479                                 break;
3480                         }
3481                         enabled_rbs >>= 2;
3482                 }
3483                 WREG32(PA_SC_RASTER_CONFIG, data);
3484         }
3485         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3486         mutex_unlock(&rdev->grbm_idx_mutex);
3487 }
3488
3489 /**
3490  * cik_gpu_init - setup the 3D engine
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Configures the 3D engine and tiling configuration
3495  * registers so that the 3D engine is usable.
3496  */
3497 static void cik_gpu_init(struct radeon_device *rdev)
3498 {
3499         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3500         u32 mc_shared_chmap, mc_arb_ramcfg;
3501         u32 hdp_host_path_cntl;
3502         u32 tmp;
3503         int i, j;
3504
3505         switch (rdev->family) {
3506         case CHIP_BONAIRE:
3507                 rdev->config.cik.max_shader_engines = 2;
3508                 rdev->config.cik.max_tile_pipes = 4;
3509                 rdev->config.cik.max_cu_per_sh = 7;
3510                 rdev->config.cik.max_sh_per_se = 1;
3511                 rdev->config.cik.max_backends_per_se = 2;
3512                 rdev->config.cik.max_texture_channel_caches = 4;
3513                 rdev->config.cik.max_gprs = 256;
3514                 rdev->config.cik.max_gs_threads = 32;
3515                 rdev->config.cik.max_hw_contexts = 8;
3516
3517                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3522                 break;
3523         case CHIP_HAWAII:
3524                 rdev->config.cik.max_shader_engines = 4;
3525                 rdev->config.cik.max_tile_pipes = 16;
3526                 rdev->config.cik.max_cu_per_sh = 11;
3527                 rdev->config.cik.max_sh_per_se = 1;
3528                 rdev->config.cik.max_backends_per_se = 4;
3529                 rdev->config.cik.max_texture_channel_caches = 16;
3530                 rdev->config.cik.max_gprs = 256;
3531                 rdev->config.cik.max_gs_threads = 32;
3532                 rdev->config.cik.max_hw_contexts = 8;
3533
3534                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3535                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3536                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3537                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3538                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3539                 break;
3540         case CHIP_KAVERI:
3541                 rdev->config.cik.max_shader_engines = 1;
3542                 rdev->config.cik.max_tile_pipes = 4;
3543                 if ((rdev->pdev->device == 0x1304) ||
3544                     (rdev->pdev->device == 0x1305) ||
3545                     (rdev->pdev->device == 0x130C) ||
3546                     (rdev->pdev->device == 0x130F) ||
3547                     (rdev->pdev->device == 0x1310) ||
3548                     (rdev->pdev->device == 0x1311) ||
3549                     (rdev->pdev->device == 0x131C)) {
3550                         rdev->config.cik.max_cu_per_sh = 8;
3551                         rdev->config.cik.max_backends_per_se = 2;
3552                 } else if ((rdev->pdev->device == 0x1309) ||
3553                            (rdev->pdev->device == 0x130A) ||
3554                            (rdev->pdev->device == 0x130D) ||
3555                            (rdev->pdev->device == 0x1313) ||
3556                            (rdev->pdev->device == 0x131D)) {
3557                         rdev->config.cik.max_cu_per_sh = 6;
3558                         rdev->config.cik.max_backends_per_se = 2;
3559                 } else if ((rdev->pdev->device == 0x1306) ||
3560                            (rdev->pdev->device == 0x1307) ||
3561                            (rdev->pdev->device == 0x130B) ||
3562                            (rdev->pdev->device == 0x130E) ||
3563                            (rdev->pdev->device == 0x1315) ||
3564                            (rdev->pdev->device == 0x1318) ||
3565                            (rdev->pdev->device == 0x131B)) {
3566                         rdev->config.cik.max_cu_per_sh = 4;
3567                         rdev->config.cik.max_backends_per_se = 1;
3568                 } else {
3569                         rdev->config.cik.max_cu_per_sh = 3;
3570                         rdev->config.cik.max_backends_per_se = 1;
3571                 }
3572                 rdev->config.cik.max_sh_per_se = 1;
3573                 rdev->config.cik.max_texture_channel_caches = 4;
3574                 rdev->config.cik.max_gprs = 256;
3575                 rdev->config.cik.max_gs_threads = 16;
3576                 rdev->config.cik.max_hw_contexts = 8;
3577
3578                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3579                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3580                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3581                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3582                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3583                 break;
3584         case CHIP_KABINI:
3585         case CHIP_MULLINS:
3586         default:
3587                 rdev->config.cik.max_shader_engines = 1;
3588                 rdev->config.cik.max_tile_pipes = 2;
3589                 rdev->config.cik.max_cu_per_sh = 2;
3590                 rdev->config.cik.max_sh_per_se = 1;
3591                 rdev->config.cik.max_backends_per_se = 1;
3592                 rdev->config.cik.max_texture_channel_caches = 2;
3593                 rdev->config.cik.max_gprs = 256;
3594                 rdev->config.cik.max_gs_threads = 16;
3595                 rdev->config.cik.max_hw_contexts = 8;
3596
3597                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3598                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3599                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3600                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3601                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3602                 break;
3603         }
3604
3605         /* Initialize HDP */
3606         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3607                 WREG32((0x2c14 + j), 0x00000000);
3608                 WREG32((0x2c18 + j), 0x00000000);
3609                 WREG32((0x2c1c + j), 0x00000000);
3610                 WREG32((0x2c20 + j), 0x00000000);
3611                 WREG32((0x2c24 + j), 0x00000000);
3612         }
3613
3614         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3615
3616         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3617
3618         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3619         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3620
3621         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3622         rdev->config.cik.mem_max_burst_length_bytes = 256;
3623         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3624         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3625         if (rdev->config.cik.mem_row_size_in_kb > 4)
3626                 rdev->config.cik.mem_row_size_in_kb = 4;
3627         /* XXX use MC settings? */
3628         rdev->config.cik.shader_engine_tile_size = 32;
3629         rdev->config.cik.num_gpus = 1;
3630         rdev->config.cik.multi_gpu_tile_size = 64;
3631
3632         /* fix up row size */
3633         gb_addr_config &= ~ROW_SIZE_MASK;
3634         switch (rdev->config.cik.mem_row_size_in_kb) {
3635         case 1:
3636         default:
3637                 gb_addr_config |= ROW_SIZE(0);
3638                 break;
3639         case 2:
3640                 gb_addr_config |= ROW_SIZE(1);
3641                 break;
3642         case 4:
3643                 gb_addr_config |= ROW_SIZE(2);
3644                 break;
3645         }
3646
3647         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3648          * not have bank info, so create a custom tiling dword.
3649          * bits 3:0   num_pipes
3650          * bits 7:4   num_banks
3651          * bits 11:8  group_size
3652          * bits 15:12 row_size
3653          */
3654         rdev->config.cik.tile_config = 0;
3655         switch (rdev->config.cik.num_tile_pipes) {
3656         case 1:
3657                 rdev->config.cik.tile_config |= (0 << 0);
3658                 break;
3659         case 2:
3660                 rdev->config.cik.tile_config |= (1 << 0);
3661                 break;
3662         case 4:
3663                 rdev->config.cik.tile_config |= (2 << 0);
3664                 break;
3665         case 8:
3666         default:
3667                 /* XXX what about 12? */
3668                 rdev->config.cik.tile_config |= (3 << 0);
3669                 break;
3670         }
3671         rdev->config.cik.tile_config |=
3672                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3673         rdev->config.cik.tile_config |=
3674                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3675         rdev->config.cik.tile_config |=
3676                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3677
3678         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3679         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3680         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3681         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3682         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3683         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3684         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3685         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3686
3687         cik_tiling_mode_table_init(rdev);
3688
3689         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3690                      rdev->config.cik.max_sh_per_se,
3691                      rdev->config.cik.max_backends_per_se);
3692
3693         rdev->config.cik.active_cus = 0;
3694         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3695                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3696                         rdev->config.cik.active_cus +=
3697                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3698                 }
3699         }
3700
3701         /* set HW defaults for 3D engine */
3702         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3703
3704         mutex_lock(&rdev->grbm_idx_mutex);
3705         /*
3706          * making sure that the following register writes will be broadcasted
3707          * to all the shaders
3708          */
3709         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3710         WREG32(SX_DEBUG_1, 0x20);
3711
3712         WREG32(TA_CNTL_AUX, 0x00010000);
3713
3714         tmp = RREG32(SPI_CONFIG_CNTL);
3715         tmp |= 0x03000000;
3716         WREG32(SPI_CONFIG_CNTL, tmp);
3717
3718         WREG32(SQ_CONFIG, 1);
3719
3720         WREG32(DB_DEBUG, 0);
3721
3722         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3723         tmp |= 0x00000400;
3724         WREG32(DB_DEBUG2, tmp);
3725
3726         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3727         tmp |= 0x00020200;
3728         WREG32(DB_DEBUG3, tmp);
3729
3730         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3731         tmp |= 0x00018208;
3732         WREG32(CB_HW_CONTROL, tmp);
3733
3734         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3735
3736         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3737                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3738                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3739                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3740
3741         WREG32(VGT_NUM_INSTANCES, 1);
3742
3743         WREG32(CP_PERFMON_CNTL, 0);
3744
3745         WREG32(SQ_CONFIG, 0);
3746
3747         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3748                                           FORCE_EOV_MAX_REZ_CNT(255)));
3749
3750         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3751                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3752
3753         WREG32(VGT_GS_VERTEX_REUSE, 16);
3754         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3755
3756         tmp = RREG32(HDP_MISC_CNTL);
3757         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3758         WREG32(HDP_MISC_CNTL, tmp);
3759
3760         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3761         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3762
3763         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3764         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3765         mutex_unlock(&rdev->grbm_idx_mutex);
3766
3767         udelay(50);
3768 }
3769
3770 /*
3771  * GPU scratch registers helpers function.
3772  */
3773 /**
3774  * cik_scratch_init - setup driver info for CP scratch regs
3775  *
3776  * @rdev: radeon_device pointer
3777  *
3778  * Set up the number and offset of the CP scratch registers.
3779  * NOTE: use of CP scratch registers is a legacy inferface and
3780  * is not used by default on newer asics (r6xx+).  On newer asics,
3781  * memory buffers are used for fences rather than scratch regs.
3782  */
3783 static void cik_scratch_init(struct radeon_device *rdev)
3784 {
3785         int i;
3786
3787         rdev->scratch.num_reg = 7;
3788         rdev->scratch.reg_base = SCRATCH_REG0;
3789         for (i = 0; i < rdev->scratch.num_reg; i++) {
3790                 rdev->scratch.free[i] = true;
3791                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3792         }
3793 }
3794
3795 /**
3796  * cik_ring_test - basic gfx ring test
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ring: radeon_ring structure holding ring information
3800  *
3801  * Allocate a scratch register and write to it using the gfx ring (CIK).
3802  * Provides a basic gfx ring test to verify that the ring is working.
3803  * Used by cik_cp_gfx_resume();
3804  * Returns 0 on success, error on failure.
3805  */
3806 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807 {
3808         uint32_t scratch;
3809         uint32_t tmp = 0;
3810         unsigned i;
3811         int r;
3812
3813         r = radeon_scratch_get(rdev, &scratch);
3814         if (r) {
3815                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3816                 return r;
3817         }
3818         WREG32(scratch, 0xCAFEDEAD);
3819         r = radeon_ring_lock(rdev, ring, 3);
3820         if (r) {
3821                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3822                 radeon_scratch_free(rdev, scratch);
3823                 return r;
3824         }
3825         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3826         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3827         radeon_ring_write(ring, 0xDEADBEEF);
3828         radeon_ring_unlock_commit(rdev, ring, false);
3829
3830         for (i = 0; i < rdev->usec_timeout; i++) {
3831                 tmp = RREG32(scratch);
3832                 if (tmp == 0xDEADBEEF)
3833                         break;
3834                 DRM_UDELAY(1);
3835         }
3836         if (i < rdev->usec_timeout) {
3837                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3838         } else {
3839                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3840                           ring->idx, scratch, tmp);
3841                 r = -EINVAL;
3842         }
3843         radeon_scratch_free(rdev, scratch);
3844         return r;
3845 }
3846
3847 /**
3848  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3849  *
3850  * @rdev: radeon_device pointer
3851  * @ridx: radeon ring index
3852  *
3853  * Emits an hdp flush on the cp.
3854  */
3855 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3856                                        int ridx)
3857 {
3858         struct radeon_ring *ring = &rdev->ring[ridx];
3859         u32 ref_and_mask;
3860
3861         switch (ring->idx) {
3862         case CAYMAN_RING_TYPE_CP1_INDEX:
3863         case CAYMAN_RING_TYPE_CP2_INDEX:
3864         default:
3865                 switch (ring->me) {
3866                 case 0:
3867                         ref_and_mask = CP2 << ring->pipe;
3868                         break;
3869                 case 1:
3870                         ref_and_mask = CP6 << ring->pipe;
3871                         break;
3872                 default:
3873                         return;
3874                 }
3875                 break;
3876         case RADEON_RING_TYPE_GFX_INDEX:
3877                 ref_and_mask = CP0;
3878                 break;
3879         }
3880
3881         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3882         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3883                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3884                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3885         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3886         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3887         radeon_ring_write(ring, ref_and_mask);
3888         radeon_ring_write(ring, ref_and_mask);
3889         radeon_ring_write(ring, 0x20); /* poll interval */
3890 }
3891
3892 /**
3893  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3894  *
3895  * @rdev: radeon_device pointer
3896  * @fence: radeon fence object
3897  *
3898  * Emits a fence sequnce number on the gfx ring and flushes
3899  * GPU caches.
3900  */
3901 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3902                              struct radeon_fence *fence)
3903 {
3904         struct radeon_ring *ring = &rdev->ring[fence->ring];
3905         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3906
3907         /* EVENT_WRITE_EOP - flush caches, send int */
3908         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3909         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3910                                  EOP_TC_ACTION_EN |
3911                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3912                                  EVENT_INDEX(5)));
3913         radeon_ring_write(ring, addr & 0xfffffffc);
3914         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3915         radeon_ring_write(ring, fence->seq);
3916         radeon_ring_write(ring, 0);
3917 }
3918
3919 /**
3920  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3921  *
3922  * @rdev: radeon_device pointer
3923  * @fence: radeon fence object
3924  *
3925  * Emits a fence sequnce number on the compute ring and flushes
3926  * GPU caches.
3927  */
3928 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3929                                  struct radeon_fence *fence)
3930 {
3931         struct radeon_ring *ring = &rdev->ring[fence->ring];
3932         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3933
3934         /* RELEASE_MEM - flush caches, send int */
3935         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3936         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3937                                  EOP_TC_ACTION_EN |
3938                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3939                                  EVENT_INDEX(5)));
3940         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3941         radeon_ring_write(ring, addr & 0xfffffffc);
3942         radeon_ring_write(ring, upper_32_bits(addr));
3943         radeon_ring_write(ring, fence->seq);
3944         radeon_ring_write(ring, 0);
3945 }
3946
3947 /**
3948  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3949  *
3950  * @rdev: radeon_device pointer
3951  * @ring: radeon ring buffer object
3952  * @semaphore: radeon semaphore object
3953  * @emit_wait: Is this a sempahore wait?
3954  *
3955  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3956  * from running ahead of semaphore waits.
3957  */
3958 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3959                              struct radeon_ring *ring,
3960                              struct radeon_semaphore *semaphore,
3961                              bool emit_wait)
3962 {
3963         uint64_t addr = semaphore->gpu_addr;
3964         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3965
3966         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3967         radeon_ring_write(ring, lower_32_bits(addr));
3968         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3969
3970         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3971                 /* Prevent the PFP from running ahead of the semaphore wait */
3972                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3973                 radeon_ring_write(ring, 0x0);
3974         }
3975
3976         return true;
3977 }
3978
3979 /**
3980  * cik_copy_cpdma - copy pages using the CP DMA engine
3981  *
3982  * @rdev: radeon_device pointer
3983  * @src_offset: src GPU address
3984  * @dst_offset: dst GPU address
3985  * @num_gpu_pages: number of GPU pages to xfer
3986  * @resv: reservation object to sync to
3987  *
3988  * Copy GPU paging using the CP DMA engine (CIK+).
3989  * Used by the radeon ttm implementation to move pages if
3990  * registered as the asic copy callback.
3991  */
3992 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3993                                     uint64_t src_offset, uint64_t dst_offset,
3994                                     unsigned num_gpu_pages,
3995                                     struct reservation_object *resv)
3996 {
3997         struct radeon_semaphore *sem = NULL;
3998         struct radeon_fence *fence;
3999         int ring_index = rdev->asic->copy.blit_ring_index;
4000         struct radeon_ring *ring = &rdev->ring[ring_index];
4001         u32 size_in_bytes, cur_size_in_bytes, control;
4002         int i, num_loops;
4003         int r = 0;
4004
4005         r = radeon_semaphore_create(rdev, &sem);
4006         if (r) {
4007                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4008                 return ERR_PTR(r);
4009         }
4010
4011         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4012         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4013         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4014         if (r) {
4015                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4016                 radeon_semaphore_free(rdev, &sem, NULL);
4017                 return ERR_PTR(r);
4018         }
4019
4020         radeon_semaphore_sync_resv(rdev, sem, resv, false);
4021         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
4022
4023         for (i = 0; i < num_loops; i++) {
4024                 cur_size_in_bytes = size_in_bytes;
4025                 if (cur_size_in_bytes > 0x1fffff)
4026                         cur_size_in_bytes = 0x1fffff;
4027                 size_in_bytes -= cur_size_in_bytes;
4028                 control = 0;
4029                 if (size_in_bytes == 0)
4030                         control |= PACKET3_DMA_DATA_CP_SYNC;
4031                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4032                 radeon_ring_write(ring, control);
4033                 radeon_ring_write(ring, lower_32_bits(src_offset));
4034                 radeon_ring_write(ring, upper_32_bits(src_offset));
4035                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4036                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4037                 radeon_ring_write(ring, cur_size_in_bytes);
4038                 src_offset += cur_size_in_bytes;
4039                 dst_offset += cur_size_in_bytes;
4040         }
4041
4042         r = radeon_fence_emit(rdev, &fence, ring->idx);
4043         if (r) {
4044                 radeon_ring_unlock_undo(rdev, ring);
4045                 radeon_semaphore_free(rdev, &sem, NULL);
4046                 return ERR_PTR(r);
4047         }
4048
4049         radeon_ring_unlock_commit(rdev, ring, false);
4050         radeon_semaphore_free(rdev, &sem, fence);
4051
4052         return fence;
4053 }
4054
4055 /*
4056  * IB stuff
4057  */
4058 /**
4059  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4060  *
4061  * @rdev: radeon_device pointer
4062  * @ib: radeon indirect buffer object
4063  *
4064  * Emits an DE (drawing engine) or CE (constant engine) IB
4065  * on the gfx ring.  IBs are usually generated by userspace
4066  * acceleration drivers and submitted to the kernel for
4067  * sheduling on the ring.  This function schedules the IB
4068  * on the gfx ring for execution by the GPU.
4069  */
4070 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4071 {
4072         struct radeon_ring *ring = &rdev->ring[ib->ring];
4073         u32 header, control = INDIRECT_BUFFER_VALID;
4074
4075         if (ib->is_const_ib) {
4076                 /* set switch buffer packet before const IB */
4077                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4078                 radeon_ring_write(ring, 0);
4079
4080                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4081         } else {
4082                 u32 next_rptr;
4083                 if (ring->rptr_save_reg) {
4084                         next_rptr = ring->wptr + 3 + 4;
4085                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4086                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4087                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4088                         radeon_ring_write(ring, next_rptr);
4089                 } else if (rdev->wb.enabled) {
4090                         next_rptr = ring->wptr + 5 + 4;
4091                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4092                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4093                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4094                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4095                         radeon_ring_write(ring, next_rptr);
4096                 }
4097
4098                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4099         }
4100
4101         control |= ib->length_dw |
4102                 (ib->vm ? (ib->vm->id << 24) : 0);
4103
4104         radeon_ring_write(ring, header);
4105         radeon_ring_write(ring,
4106 #ifdef __BIG_ENDIAN
4107                           (2 << 0) |
4108 #endif
4109                           (ib->gpu_addr & 0xFFFFFFFC));
4110         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4111         radeon_ring_write(ring, control);
4112 }
4113
4114 /**
4115  * cik_ib_test - basic gfx ring IB test
4116  *
4117  * @rdev: radeon_device pointer
4118  * @ring: radeon_ring structure holding ring information
4119  *
4120  * Allocate an IB and execute it on the gfx ring (CIK).
4121  * Provides a basic gfx ring test to verify that IBs are working.
4122  * Returns 0 on success, error on failure.
4123  */
4124 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4125 {
4126         struct radeon_ib ib;
4127         uint32_t scratch;
4128         uint32_t tmp = 0;
4129         unsigned i;
4130         int r;
4131
4132         r = radeon_scratch_get(rdev, &scratch);
4133         if (r) {
4134                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4135                 return r;
4136         }
4137         WREG32(scratch, 0xCAFEDEAD);
4138         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4139         if (r) {
4140                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4141                 radeon_scratch_free(rdev, scratch);
4142                 return r;
4143         }
4144         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4145         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4146         ib.ptr[2] = 0xDEADBEEF;
4147         ib.length_dw = 3;
4148         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4149         if (r) {
4150                 radeon_scratch_free(rdev, scratch);
4151                 radeon_ib_free(rdev, &ib);
4152                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4153                 return r;
4154         }
4155         r = radeon_fence_wait(ib.fence, false);
4156         if (r) {
4157                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4158                 radeon_scratch_free(rdev, scratch);
4159                 radeon_ib_free(rdev, &ib);
4160                 return r;
4161         }
4162         for (i = 0; i < rdev->usec_timeout; i++) {
4163                 tmp = RREG32(scratch);
4164                 if (tmp == 0xDEADBEEF)
4165                         break;
4166                 DRM_UDELAY(1);
4167         }
4168         if (i < rdev->usec_timeout) {
4169                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4170         } else {
4171                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4172                           scratch, tmp);
4173                 r = -EINVAL;
4174         }
4175         radeon_scratch_free(rdev, scratch);
4176         radeon_ib_free(rdev, &ib);
4177         return r;
4178 }
4179
4180 /*
4181  * CP.
4182  * On CIK, gfx and compute now have independant command processors.
4183  *
4184  * GFX
4185  * Gfx consists of a single ring and can process both gfx jobs and
4186  * compute jobs.  The gfx CP consists of three microengines (ME):
4187  * PFP - Pre-Fetch Parser
4188  * ME - Micro Engine
4189  * CE - Constant Engine
4190  * The PFP and ME make up what is considered the Drawing Engine (DE).
4191  * The CE is an asynchronous engine used for updating buffer desciptors
4192  * used by the DE so that they can be loaded into cache in parallel
4193  * while the DE is processing state update packets.
4194  *
4195  * Compute
4196  * The compute CP consists of two microengines (ME):
4197  * MEC1 - Compute MicroEngine 1
4198  * MEC2 - Compute MicroEngine 2
4199  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4200  * The queues are exposed to userspace and are programmed directly
4201  * by the compute runtime.
4202  */
4203 /**
4204  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4205  *
4206  * @rdev: radeon_device pointer
4207  * @enable: enable or disable the MEs
4208  *
4209  * Halts or unhalts the gfx MEs.
4210  */
4211 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4212 {
4213         if (enable)
4214                 WREG32(CP_ME_CNTL, 0);
4215         else {
4216                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4217                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4218                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4219                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4220         }
4221         udelay(50);
4222 }
4223
4224 /**
4225  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4226  *
4227  * @rdev: radeon_device pointer
4228  *
4229  * Loads the gfx PFP, ME, and CE ucode.
4230  * Returns 0 for success, -EINVAL if the ucode is not available.
4231  */
4232 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4233 {
4234         int i;
4235
4236         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4237                 return -EINVAL;
4238
4239         cik_cp_gfx_enable(rdev, false);
4240
4241         if (rdev->new_fw) {
4242                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4243                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4244                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4245                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4246                 const struct gfx_firmware_header_v1_0 *me_hdr =
4247                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4248                 const __le32 *fw_data;
4249                 u32 fw_size;
4250
4251                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4252                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4253                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4254
4255                 /* PFP */
4256                 fw_data = (const __le32 *)
4257                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4258                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4259                 WREG32(CP_PFP_UCODE_ADDR, 0);
4260                 for (i = 0; i < fw_size; i++)
4261                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4262                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4263
4264                 /* CE */
4265                 fw_data = (const __le32 *)
4266                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4267                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4268                 WREG32(CP_CE_UCODE_ADDR, 0);
4269                 for (i = 0; i < fw_size; i++)
4270                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4271                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4272
4273                 /* ME */
4274                 fw_data = (const __be32 *)
4275                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4276                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4277                 WREG32(CP_ME_RAM_WADDR, 0);
4278                 for (i = 0; i < fw_size; i++)
4279                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4280                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4281                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4282         } else {
4283                 const __be32 *fw_data;
4284
4285                 /* PFP */
4286                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4287                 WREG32(CP_PFP_UCODE_ADDR, 0);
4288                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4289                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4290                 WREG32(CP_PFP_UCODE_ADDR, 0);
4291
4292                 /* CE */
4293                 fw_data = (const __be32 *)rdev->ce_fw->data;
4294                 WREG32(CP_CE_UCODE_ADDR, 0);
4295                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4296                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4297                 WREG32(CP_CE_UCODE_ADDR, 0);
4298
4299                 /* ME */
4300                 fw_data = (const __be32 *)rdev->me_fw->data;
4301                 WREG32(CP_ME_RAM_WADDR, 0);
4302                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4303                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4304                 WREG32(CP_ME_RAM_WADDR, 0);
4305         }
4306
4307         return 0;
4308 }
4309
4310 /**
4311  * cik_cp_gfx_start - start the gfx ring
4312  *
4313  * @rdev: radeon_device pointer
4314  *
4315  * Enables the ring and loads the clear state context and other
4316  * packets required to init the ring.
4317  * Returns 0 for success, error for failure.
4318  */
4319 static int cik_cp_gfx_start(struct radeon_device *rdev)
4320 {
4321         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4322         int r, i;
4323
4324         /* init the CP */
4325         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4326         WREG32(CP_ENDIAN_SWAP, 0);
4327         WREG32(CP_DEVICE_ID, 1);
4328
4329         cik_cp_gfx_enable(rdev, true);
4330
4331         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4332         if (r) {
4333                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4334                 return r;
4335         }
4336
4337         /* init the CE partitions.  CE only used for gfx on CIK */
4338         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4339         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4340         radeon_ring_write(ring, 0xc000);
4341         radeon_ring_write(ring, 0xc000);
4342
4343         /* setup clear context state */
4344         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4345         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4346
4347         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4348         radeon_ring_write(ring, 0x80000000);
4349         radeon_ring_write(ring, 0x80000000);
4350
4351         for (i = 0; i < cik_default_size; i++)
4352                 radeon_ring_write(ring, cik_default_state[i]);
4353
4354         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4355         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4356
4357         /* set clear context state */
4358         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4359         radeon_ring_write(ring, 0);
4360
4361         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4362         radeon_ring_write(ring, 0x00000316);
4363         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4364         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4365
4366         radeon_ring_unlock_commit(rdev, ring, false);
4367
4368         return 0;
4369 }
4370
4371 /**
4372  * cik_cp_gfx_fini - stop the gfx ring
4373  *
4374  * @rdev: radeon_device pointer
4375  *
4376  * Stop the gfx ring and tear down the driver ring
4377  * info.
4378  */
4379 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4380 {
4381         cik_cp_gfx_enable(rdev, false);
4382         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4383 }
4384
4385 /**
4386  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4387  *
4388  * @rdev: radeon_device pointer
4389  *
4390  * Program the location and size of the gfx ring buffer
4391  * and test it to make sure it's working.
4392  * Returns 0 for success, error for failure.
4393  */
4394 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4395 {
4396         struct radeon_ring *ring;
4397         u32 tmp;
4398         u32 rb_bufsz;
4399         u64 rb_addr;
4400         int r;
4401
4402         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4403         if (rdev->family != CHIP_HAWAII)
4404                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4405
4406         /* Set the write pointer delay */
4407         WREG32(CP_RB_WPTR_DELAY, 0);
4408
4409         /* set the RB to use vmid 0 */
4410         WREG32(CP_RB_VMID, 0);
4411
4412         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4413
4414         /* ring 0 - compute and gfx */
4415         /* Set ring buffer size */
4416         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4417         rb_bufsz = order_base_2(ring->ring_size / 8);
4418         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4419 #ifdef __BIG_ENDIAN
4420         tmp |= BUF_SWAP_32BIT;
4421 #endif
4422         WREG32(CP_RB0_CNTL, tmp);
4423
4424         /* Initialize the ring buffer's read and write pointers */
4425         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4426         ring->wptr = 0;
4427         WREG32(CP_RB0_WPTR, ring->wptr);
4428
4429         /* set the wb address wether it's enabled or not */
4430         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4431         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4432
4433         /* scratch register shadowing is no longer supported */
4434         WREG32(SCRATCH_UMSK, 0);
4435
4436         if (!rdev->wb.enabled)
4437                 tmp |= RB_NO_UPDATE;
4438
4439         mdelay(1);
4440         WREG32(CP_RB0_CNTL, tmp);
4441
4442         rb_addr = ring->gpu_addr >> 8;
4443         WREG32(CP_RB0_BASE, rb_addr);
4444         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4445
4446         /* start the ring */
4447         cik_cp_gfx_start(rdev);
4448         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4449         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4450         if (r) {
4451                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4452                 return r;
4453         }
4454
4455         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4456                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4457
4458         return 0;
4459 }
4460
4461 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4462                      struct radeon_ring *ring)
4463 {
4464         u32 rptr;
4465
4466         if (rdev->wb.enabled)
4467                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4468         else
4469                 rptr = RREG32(CP_RB0_RPTR);
4470
4471         return rptr;
4472 }
4473
4474 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4475                      struct radeon_ring *ring)
4476 {
4477         u32 wptr;
4478
4479         wptr = RREG32(CP_RB0_WPTR);
4480
4481         return wptr;
4482 }
4483
4484 void cik_gfx_set_wptr(struct radeon_device *rdev,
4485                       struct radeon_ring *ring)
4486 {
4487         WREG32(CP_RB0_WPTR, ring->wptr);
4488         (void)RREG32(CP_RB0_WPTR);
4489 }
4490
4491 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4492                          struct radeon_ring *ring)
4493 {
4494         u32 rptr;
4495
4496         if (rdev->wb.enabled) {
4497                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4498         } else {
4499                 mutex_lock(&rdev->srbm_mutex);
4500                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4501                 rptr = RREG32(CP_HQD_PQ_RPTR);
4502                 cik_srbm_select(rdev, 0, 0, 0, 0);
4503                 mutex_unlock(&rdev->srbm_mutex);
4504         }
4505
4506         return rptr;
4507 }
4508
4509 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4510                          struct radeon_ring *ring)
4511 {
4512         u32 wptr;
4513
4514         if (rdev->wb.enabled) {
4515                 /* XXX check if swapping is necessary on BE */
4516                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4517         } else {
4518                 mutex_lock(&rdev->srbm_mutex);
4519                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4520                 wptr = RREG32(CP_HQD_PQ_WPTR);
4521                 cik_srbm_select(rdev, 0, 0, 0, 0);
4522                 mutex_unlock(&rdev->srbm_mutex);
4523         }
4524
4525         return wptr;
4526 }
4527
4528 void cik_compute_set_wptr(struct radeon_device *rdev,
4529                           struct radeon_ring *ring)
4530 {
4531         /* XXX check if swapping is necessary on BE */
4532         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4533         WDOORBELL32(ring->doorbell_index, ring->wptr);
4534 }
4535
4536 /**
4537  * cik_cp_compute_enable - enable/disable the compute CP MEs
4538  *
4539  * @rdev: radeon_device pointer
4540  * @enable: enable or disable the MEs
4541  *
4542  * Halts or unhalts the compute MEs.
4543  */
4544 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4545 {
4546         if (enable)
4547                 WREG32(CP_MEC_CNTL, 0);
4548         else {
4549                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4550                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4551                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4552         }
4553         udelay(50);
4554 }
4555
4556 /**
4557  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4558  *
4559  * @rdev: radeon_device pointer
4560  *
4561  * Loads the compute MEC1&2 ucode.
4562  * Returns 0 for success, -EINVAL if the ucode is not available.
4563  */
4564 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4565 {
4566         int i;
4567
4568         if (!rdev->mec_fw)
4569                 return -EINVAL;
4570
4571         cik_cp_compute_enable(rdev, false);
4572
4573         if (rdev->new_fw) {
4574                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4575                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4576                 const __le32 *fw_data;
4577                 u32 fw_size;
4578
4579                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4580
4581                 /* MEC1 */
4582                 fw_data = (const __le32 *)
4583                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4584                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4585                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4586                 for (i = 0; i < fw_size; i++)
4587                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4588                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4589
4590                 /* MEC2 */
4591                 if (rdev->family == CHIP_KAVERI) {
4592                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4593                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4594
4595                         fw_data = (const __le32 *)
4596                                 (rdev->mec2_fw->data +
4597                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4598                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4599                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4600                         for (i = 0; i < fw_size; i++)
4601                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4602                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4603                 }
4604         } else {
4605                 const __be32 *fw_data;
4606
4607                 /* MEC1 */
4608                 fw_data = (const __be32 *)rdev->mec_fw->data;
4609                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4610                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4611                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4612                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4613
4614                 if (rdev->family == CHIP_KAVERI) {
4615                         /* MEC2 */
4616                         fw_data = (const __be32 *)rdev->mec_fw->data;
4617                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4618                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4619                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4620                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4621                 }
4622         }
4623
4624         return 0;
4625 }
4626
4627 /**
4628  * cik_cp_compute_start - start the compute queues
4629  *
4630  * @rdev: radeon_device pointer
4631  *
4632  * Enable the compute queues.
4633  * Returns 0 for success, error for failure.
4634  */
4635 static int cik_cp_compute_start(struct radeon_device *rdev)
4636 {
4637         cik_cp_compute_enable(rdev, true);
4638
4639         return 0;
4640 }
4641
4642 /**
4643  * cik_cp_compute_fini - stop the compute queues
4644  *
4645  * @rdev: radeon_device pointer
4646  *
4647  * Stop the compute queues and tear down the driver queue
4648  * info.
4649  */
4650 static void cik_cp_compute_fini(struct radeon_device *rdev)
4651 {
4652         int i, idx, r;
4653
4654         cik_cp_compute_enable(rdev, false);
4655
4656         for (i = 0; i < 2; i++) {
4657                 if (i == 0)
4658                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4659                 else
4660                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4661
4662                 if (rdev->ring[idx].mqd_obj) {
4663                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4664                         if (unlikely(r != 0))
4665                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4666
4667                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4668                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4669
4670                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4671                         rdev->ring[idx].mqd_obj = NULL;
4672                 }
4673         }
4674 }
4675
4676 static void cik_mec_fini(struct radeon_device *rdev)
4677 {
4678         int r;
4679
4680         if (rdev->mec.hpd_eop_obj) {
4681                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4682                 if (unlikely(r != 0))
4683                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4684                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4685                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4686
4687                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4688                 rdev->mec.hpd_eop_obj = NULL;
4689         }
4690 }
4691
4692 #define MEC_HPD_SIZE 2048
4693
4694 static int cik_mec_init(struct radeon_device *rdev)
4695 {
4696         int r;
4697         u32 *hpd;
4698
4699         /*
4700          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4701          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4702          * Nonetheless, we assign only 1 pipe because all other pipes will
4703          * be handled by KFD
4704          */
4705         rdev->mec.num_mec = 1;
4706         rdev->mec.num_pipe = 1;
4707         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4708
4709         if (rdev->mec.hpd_eop_obj == NULL) {
4710                 r = radeon_bo_create(rdev,
4711                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4712                                      PAGE_SIZE, true,
4713                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4714                                      &rdev->mec.hpd_eop_obj);
4715                 if (r) {
4716                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4717                         return r;
4718                 }
4719         }
4720
4721         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4722         if (unlikely(r != 0)) {
4723                 cik_mec_fini(rdev);
4724                 return r;
4725         }
4726         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4727                           &rdev->mec.hpd_eop_gpu_addr);
4728         if (r) {
4729                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4730                 cik_mec_fini(rdev);
4731                 return r;
4732         }
4733         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4734         if (r) {
4735                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4736                 cik_mec_fini(rdev);
4737                 return r;
4738         }
4739
4740         /* clear memory.  Not sure if this is required or not */
4741         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4742
4743         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4744         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4745
4746         return 0;
4747 }
4748
4749 struct hqd_registers
4750 {
4751         u32 cp_mqd_base_addr;
4752         u32 cp_mqd_base_addr_hi;
4753         u32 cp_hqd_active;
4754         u32 cp_hqd_vmid;
4755         u32 cp_hqd_persistent_state;
4756         u32 cp_hqd_pipe_priority;
4757         u32 cp_hqd_queue_priority;
4758         u32 cp_hqd_quantum;
4759         u32 cp_hqd_pq_base;
4760         u32 cp_hqd_pq_base_hi;
4761         u32 cp_hqd_pq_rptr;
4762         u32 cp_hqd_pq_rptr_report_addr;
4763         u32 cp_hqd_pq_rptr_report_addr_hi;
4764         u32 cp_hqd_pq_wptr_poll_addr;
4765         u32 cp_hqd_pq_wptr_poll_addr_hi;
4766         u32 cp_hqd_pq_doorbell_control;
4767         u32 cp_hqd_pq_wptr;
4768         u32 cp_hqd_pq_control;
4769         u32 cp_hqd_ib_base_addr;
4770         u32 cp_hqd_ib_base_addr_hi;
4771         u32 cp_hqd_ib_rptr;
4772         u32 cp_hqd_ib_control;
4773         u32 cp_hqd_iq_timer;
4774         u32 cp_hqd_iq_rptr;
4775         u32 cp_hqd_dequeue_request;
4776         u32 cp_hqd_dma_offload;
4777         u32 cp_hqd_sema_cmd;
4778         u32 cp_hqd_msg_type;
4779         u32 cp_hqd_atomic0_preop_lo;
4780         u32 cp_hqd_atomic0_preop_hi;
4781         u32 cp_hqd_atomic1_preop_lo;
4782         u32 cp_hqd_atomic1_preop_hi;
4783         u32 cp_hqd_hq_scheduler0;
4784         u32 cp_hqd_hq_scheduler1;
4785         u32 cp_mqd_control;
4786 };
4787
4788 struct bonaire_mqd
4789 {
4790         u32 header;
4791         u32 dispatch_initiator;
4792         u32 dimensions[3];
4793         u32 start_idx[3];
4794         u32 num_threads[3];
4795         u32 pipeline_stat_enable;
4796         u32 perf_counter_enable;
4797         u32 pgm[2];
4798         u32 tba[2];
4799         u32 tma[2];
4800         u32 pgm_rsrc[2];
4801         u32 vmid;
4802         u32 resource_limits;
4803         u32 static_thread_mgmt01[2];
4804         u32 tmp_ring_size;
4805         u32 static_thread_mgmt23[2];
4806         u32 restart[3];
4807         u32 thread_trace_enable;
4808         u32 reserved1;
4809         u32 user_data[16];
4810         u32 vgtcs_invoke_count[2];
4811         struct hqd_registers queue_state;
4812         u32 dequeue_cntr;
4813         u32 interrupt_queue[64];
4814 };
4815
4816 /**
4817  * cik_cp_compute_resume - setup the compute queue registers
4818  *
4819  * @rdev: radeon_device pointer
4820  *
4821  * Program the compute queues and test them to make sure they
4822  * are working.
4823  * Returns 0 for success, error for failure.
4824  */
4825 static int cik_cp_compute_resume(struct radeon_device *rdev)
4826 {
4827         int r, i, j, idx;
4828         u32 tmp;
4829         bool use_doorbell = true;
4830         u64 hqd_gpu_addr;
4831         u64 mqd_gpu_addr;
4832         u64 eop_gpu_addr;
4833         u64 wb_gpu_addr;
4834         u32 *buf;
4835         struct bonaire_mqd *mqd;
4836
4837         r = cik_cp_compute_start(rdev);
4838         if (r)
4839                 return r;
4840
4841         /* fix up chicken bits */
4842         tmp = RREG32(CP_CPF_DEBUG);
4843         tmp |= (1 << 23);
4844         WREG32(CP_CPF_DEBUG, tmp);
4845
4846         /* init the pipes */
4847         mutex_lock(&rdev->srbm_mutex);
4848
4849         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4850
4851         cik_srbm_select(rdev, 0, 0, 0, 0);
4852
4853         /* write the EOP addr */
4854         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4855         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4856
4857         /* set the VMID assigned */
4858         WREG32(CP_HPD_EOP_VMID, 0);
4859
4860         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4861         tmp = RREG32(CP_HPD_EOP_CONTROL);
4862         tmp &= ~EOP_SIZE_MASK;
4863         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4864         WREG32(CP_HPD_EOP_CONTROL, tmp);
4865
4866         mutex_unlock(&rdev->srbm_mutex);
4867
4868         /* init the queues.  Just two for now. */
4869         for (i = 0; i < 2; i++) {
4870                 if (i == 0)
4871                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4872                 else
4873                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4874
4875                 if (rdev->ring[idx].mqd_obj == NULL) {
4876                         r = radeon_bo_create(rdev,
4877                                              sizeof(struct bonaire_mqd),
4878                                              PAGE_SIZE, true,
4879                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4880                                              NULL, &rdev->ring[idx].mqd_obj);
4881                         if (r) {
4882                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4883                                 return r;
4884                         }
4885                 }
4886
4887                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4888                 if (unlikely(r != 0)) {
4889                         cik_cp_compute_fini(rdev);
4890                         return r;
4891                 }
4892                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4893                                   &mqd_gpu_addr);
4894                 if (r) {
4895                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4896                         cik_cp_compute_fini(rdev);
4897                         return r;
4898                 }
4899                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4900                 if (r) {
4901                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4902                         cik_cp_compute_fini(rdev);
4903                         return r;
4904                 }
4905
4906                 /* init the mqd struct */
4907                 memset(buf, 0, sizeof(struct bonaire_mqd));
4908
4909                 mqd = (struct bonaire_mqd *)buf;
4910                 mqd->header = 0xC0310800;
4911                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4912                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4913                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4914                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4915
4916                 mutex_lock(&rdev->srbm_mutex);
4917                 cik_srbm_select(rdev, rdev->ring[idx].me,
4918                                 rdev->ring[idx].pipe,
4919                                 rdev->ring[idx].queue, 0);
4920
4921                 /* disable wptr polling */
4922                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4923                 tmp &= ~WPTR_POLL_EN;
4924                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4925
4926                 /* enable doorbell? */
4927                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4928                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4929                 if (use_doorbell)
4930                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4931                 else
4932                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4933                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4934                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4935
4936                 /* disable the queue if it's active */
4937                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4938                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4939                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4940                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4941                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4942                         for (j = 0; j < rdev->usec_timeout; j++) {
4943                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4944                                         break;
4945                                 udelay(1);
4946                         }
4947                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4948                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4949                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4950                 }
4951
4952                 /* set the pointer to the MQD */
4953                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4954                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4955                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4956                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4957                 /* set MQD vmid to 0 */
4958                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4959                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4960                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4961
4962                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4963                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4964                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4965                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4966                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4967                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4968
4969                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4970                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4971                 mqd->queue_state.cp_hqd_pq_control &=
4972                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4973
4974                 mqd->queue_state.cp_hqd_pq_control |=
4975                         order_base_2(rdev->ring[idx].ring_size / 8);
4976                 mqd->queue_state.cp_hqd_pq_control |=
4977                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4978 #ifdef __BIG_ENDIAN
4979                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4980 #endif
4981                 mqd->queue_state.cp_hqd_pq_control &=
4982                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4983                 mqd->queue_state.cp_hqd_pq_control |=
4984                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4985                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4986
4987                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4988                 if (i == 0)
4989                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4990                 else
4991                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4992                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4993                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4994                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4995                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4996                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4997
4998                 /* set the wb address wether it's enabled or not */
4999                 if (i == 0)
5000                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5001                 else
5002                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5003                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5004                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5005                         upper_32_bits(wb_gpu_addr) & 0xffff;
5006                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5007                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5008                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5009                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5010
5011                 /* enable the doorbell if requested */
5012                 if (use_doorbell) {
5013                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5014                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5015                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5016                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5017                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5018                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5019                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5020                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5021
5022                 } else {
5023                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5024                 }
5025                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5026                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5027
5028                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5029                 rdev->ring[idx].wptr = 0;
5030                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5031                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5032                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5033
5034                 /* set the vmid for the queue */
5035                 mqd->queue_state.cp_hqd_vmid = 0;
5036                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5037
5038                 /* activate the queue */
5039                 mqd->queue_state.cp_hqd_active = 1;
5040                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5041
5042                 cik_srbm_select(rdev, 0, 0, 0, 0);
5043                 mutex_unlock(&rdev->srbm_mutex);
5044
5045                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5046                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5047
5048                 rdev->ring[idx].ready = true;
5049                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5050                 if (r)
5051                         rdev->ring[idx].ready = false;
5052         }
5053
5054         return 0;
5055 }
5056
5057 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5058 {
5059         cik_cp_gfx_enable(rdev, enable);
5060         cik_cp_compute_enable(rdev, enable);
5061 }
5062
5063 static int cik_cp_load_microcode(struct radeon_device *rdev)
5064 {
5065         int r;
5066
5067         r = cik_cp_gfx_load_microcode(rdev);
5068         if (r)
5069                 return r;
5070         r = cik_cp_compute_load_microcode(rdev);
5071         if (r)
5072                 return r;
5073
5074         return 0;
5075 }
5076
5077 static void cik_cp_fini(struct radeon_device *rdev)
5078 {
5079         cik_cp_gfx_fini(rdev);
5080         cik_cp_compute_fini(rdev);
5081 }
5082
5083 static int cik_cp_resume(struct radeon_device *rdev)
5084 {
5085         int r;
5086
5087         cik_enable_gui_idle_interrupt(rdev, false);
5088
5089         r = cik_cp_load_microcode(rdev);
5090         if (r)
5091                 return r;
5092
5093         r = cik_cp_gfx_resume(rdev);
5094         if (r)
5095                 return r;
5096         r = cik_cp_compute_resume(rdev);
5097         if (r)
5098                 return r;
5099
5100         cik_enable_gui_idle_interrupt(rdev, true);
5101
5102         return 0;
5103 }
5104
5105 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5106 {
5107         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5108                 RREG32(GRBM_STATUS));
5109         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5110                 RREG32(GRBM_STATUS2));
5111         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5112                 RREG32(GRBM_STATUS_SE0));
5113         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5114                 RREG32(GRBM_STATUS_SE1));
5115         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5116                 RREG32(GRBM_STATUS_SE2));
5117         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5118                 RREG32(GRBM_STATUS_SE3));
5119         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5120                 RREG32(SRBM_STATUS));
5121         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5122                 RREG32(SRBM_STATUS2));
5123         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5124                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5125         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5126                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5127         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5128         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5129                  RREG32(CP_STALLED_STAT1));
5130         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5131                  RREG32(CP_STALLED_STAT2));
5132         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5133                  RREG32(CP_STALLED_STAT3));
5134         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5135                  RREG32(CP_CPF_BUSY_STAT));
5136         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5137                  RREG32(CP_CPF_STALLED_STAT1));
5138         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5139         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5140         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5141                  RREG32(CP_CPC_STALLED_STAT1));
5142         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5143 }
5144
5145 /**
5146  * cik_gpu_check_soft_reset - check which blocks are busy
5147  *
5148  * @rdev: radeon_device pointer
5149  *
5150  * Check which blocks are busy and return the relevant reset
5151  * mask to be used by cik_gpu_soft_reset().
5152  * Returns a mask of the blocks to be reset.
5153  */
5154 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5155 {
5156         u32 reset_mask = 0;
5157         u32 tmp;
5158
5159         /* GRBM_STATUS */
5160         tmp = RREG32(GRBM_STATUS);
5161         if (tmp & (PA_BUSY | SC_BUSY |
5162                    BCI_BUSY | SX_BUSY |
5163                    TA_BUSY | VGT_BUSY |
5164                    DB_BUSY | CB_BUSY |
5165                    GDS_BUSY | SPI_BUSY |
5166                    IA_BUSY | IA_BUSY_NO_DMA))
5167                 reset_mask |= RADEON_RESET_GFX;
5168
5169         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5170                 reset_mask |= RADEON_RESET_CP;
5171
5172         /* GRBM_STATUS2 */
5173         tmp = RREG32(GRBM_STATUS2);
5174         if (tmp & RLC_BUSY)
5175                 reset_mask |= RADEON_RESET_RLC;
5176
5177         /* SDMA0_STATUS_REG */
5178         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5179         if (!(tmp & SDMA_IDLE))
5180                 reset_mask |= RADEON_RESET_DMA;
5181
5182         /* SDMA1_STATUS_REG */
5183         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5184         if (!(tmp & SDMA_IDLE))
5185                 reset_mask |= RADEON_RESET_DMA1;
5186
5187         /* SRBM_STATUS2 */
5188         tmp = RREG32(SRBM_STATUS2);
5189         if (tmp & SDMA_BUSY)
5190                 reset_mask |= RADEON_RESET_DMA;
5191
5192         if (tmp & SDMA1_BUSY)
5193                 reset_mask |= RADEON_RESET_DMA1;
5194
5195         /* SRBM_STATUS */
5196         tmp = RREG32(SRBM_STATUS);
5197
5198         if (tmp & IH_BUSY)
5199                 reset_mask |= RADEON_RESET_IH;
5200
5201         if (tmp & SEM_BUSY)
5202                 reset_mask |= RADEON_RESET_SEM;
5203
5204         if (tmp & GRBM_RQ_PENDING)
5205                 reset_mask |= RADEON_RESET_GRBM;
5206
5207         if (tmp & VMC_BUSY)
5208                 reset_mask |= RADEON_RESET_VMC;
5209
5210         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5211                    MCC_BUSY | MCD_BUSY))
5212                 reset_mask |= RADEON_RESET_MC;
5213
5214         if (evergreen_is_display_hung(rdev))
5215                 reset_mask |= RADEON_RESET_DISPLAY;
5216
5217         /* Skip MC reset as it's mostly likely not hung, just busy */
5218         if (reset_mask & RADEON_RESET_MC) {
5219                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5220                 reset_mask &= ~RADEON_RESET_MC;
5221         }
5222
5223         return reset_mask;
5224 }
5225
5226 /**
5227  * cik_gpu_soft_reset - soft reset GPU
5228  *
5229  * @rdev: radeon_device pointer
5230  * @reset_mask: mask of which blocks to reset
5231  *
5232  * Soft reset the blocks specified in @reset_mask.
5233  */
5234 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5235 {
5236         struct evergreen_mc_save save;
5237         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5238         u32 tmp;
5239
5240         if (reset_mask == 0)
5241                 return;
5242
5243         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5244
5245         cik_print_gpu_status_regs(rdev);
5246         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5247                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5248         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5249                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5250
5251         /* disable CG/PG */
5252         cik_fini_pg(rdev);
5253         cik_fini_cg(rdev);
5254
5255         /* stop the rlc */
5256         cik_rlc_stop(rdev);
5257
5258         /* Disable GFX parsing/prefetching */
5259         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5260
5261         /* Disable MEC parsing/prefetching */
5262         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5263
5264         if (reset_mask & RADEON_RESET_DMA) {
5265                 /* sdma0 */
5266                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5267                 tmp |= SDMA_HALT;
5268                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5269         }
5270         if (reset_mask & RADEON_RESET_DMA1) {
5271                 /* sdma1 */
5272                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5273                 tmp |= SDMA_HALT;
5274                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5275         }
5276
5277         evergreen_mc_stop(rdev, &save);
5278         if (evergreen_mc_wait_for_idle(rdev)) {
5279                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5280         }
5281
5282         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5283                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5284
5285         if (reset_mask & RADEON_RESET_CP) {
5286                 grbm_soft_reset |= SOFT_RESET_CP;
5287
5288                 srbm_soft_reset |= SOFT_RESET_GRBM;
5289         }
5290
5291         if (reset_mask & RADEON_RESET_DMA)
5292                 srbm_soft_reset |= SOFT_RESET_SDMA;
5293
5294         if (reset_mask & RADEON_RESET_DMA1)
5295                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5296
5297         if (reset_mask & RADEON_RESET_DISPLAY)
5298                 srbm_soft_reset |= SOFT_RESET_DC;
5299
5300         if (reset_mask & RADEON_RESET_RLC)
5301                 grbm_soft_reset |= SOFT_RESET_RLC;
5302
5303         if (reset_mask & RADEON_RESET_SEM)
5304                 srbm_soft_reset |= SOFT_RESET_SEM;
5305
5306         if (reset_mask & RADEON_RESET_IH)
5307                 srbm_soft_reset |= SOFT_RESET_IH;
5308
5309         if (reset_mask & RADEON_RESET_GRBM)
5310                 srbm_soft_reset |= SOFT_RESET_GRBM;
5311
5312         if (reset_mask & RADEON_RESET_VMC)
5313                 srbm_soft_reset |= SOFT_RESET_VMC;
5314
5315         if (!(rdev->flags & RADEON_IS_IGP)) {
5316                 if (reset_mask & RADEON_RESET_MC)
5317                         srbm_soft_reset |= SOFT_RESET_MC;
5318         }
5319
5320         if (grbm_soft_reset) {
5321                 tmp = RREG32(GRBM_SOFT_RESET);
5322                 tmp |= grbm_soft_reset;
5323                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5324                 WREG32(GRBM_SOFT_RESET, tmp);
5325                 tmp = RREG32(GRBM_SOFT_RESET);
5326
5327                 udelay(50);
5328
5329                 tmp &= ~grbm_soft_reset;
5330                 WREG32(GRBM_SOFT_RESET, tmp);
5331                 tmp = RREG32(GRBM_SOFT_RESET);
5332         }
5333
5334         if (srbm_soft_reset) {
5335                 tmp = RREG32(SRBM_SOFT_RESET);
5336                 tmp |= srbm_soft_reset;
5337                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5338                 WREG32(SRBM_SOFT_RESET, tmp);
5339                 tmp = RREG32(SRBM_SOFT_RESET);
5340
5341                 udelay(50);
5342
5343                 tmp &= ~srbm_soft_reset;
5344                 WREG32(SRBM_SOFT_RESET, tmp);
5345                 tmp = RREG32(SRBM_SOFT_RESET);
5346         }
5347
5348         /* Wait a little for things to settle down */
5349         udelay(50);
5350
5351         evergreen_mc_resume(rdev, &save);
5352         udelay(50);
5353
5354         cik_print_gpu_status_regs(rdev);
5355 }
5356
5357 struct kv_reset_save_regs {
5358         u32 gmcon_reng_execute;
5359         u32 gmcon_misc;
5360         u32 gmcon_misc3;
5361 };
5362
5363 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5364                                    struct kv_reset_save_regs *save)
5365 {
5366         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5367         save->gmcon_misc = RREG32(GMCON_MISC);
5368         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5369
5370         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5371         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5372                                                 STCTRL_STUTTER_EN));
5373 }
5374
5375 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5376                                       struct kv_reset_save_regs *save)
5377 {
5378         int i;
5379
5380         WREG32(GMCON_PGFSM_WRITE, 0);
5381         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5382
5383         for (i = 0; i < 5; i++)
5384                 WREG32(GMCON_PGFSM_WRITE, 0);
5385
5386         WREG32(GMCON_PGFSM_WRITE, 0);
5387         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5388
5389         for (i = 0; i < 5; i++)
5390                 WREG32(GMCON_PGFSM_WRITE, 0);
5391
5392         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5393         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5394
5395         for (i = 0; i < 5; i++)
5396                 WREG32(GMCON_PGFSM_WRITE, 0);
5397
5398         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5399         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5400
5401         for (i = 0; i < 5; i++)
5402                 WREG32(GMCON_PGFSM_WRITE, 0);
5403
5404         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5405         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5406
5407         for (i = 0; i < 5; i++)
5408                 WREG32(GMCON_PGFSM_WRITE, 0);
5409
5410         WREG32(GMCON_PGFSM_WRITE, 0);
5411         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5412
5413         for (i = 0; i < 5; i++)
5414                 WREG32(GMCON_PGFSM_WRITE, 0);
5415
5416         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5417         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5418
5419         for (i = 0; i < 5; i++)
5420                 WREG32(GMCON_PGFSM_WRITE, 0);
5421
5422         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5423         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5424
5425         for (i = 0; i < 5; i++)
5426                 WREG32(GMCON_PGFSM_WRITE, 0);
5427
5428         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5429         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5430
5431         for (i = 0; i < 5; i++)
5432                 WREG32(GMCON_PGFSM_WRITE, 0);
5433
5434         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5435         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5436
5437         for (i = 0; i < 5; i++)
5438                 WREG32(GMCON_PGFSM_WRITE, 0);
5439
5440         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5441         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5442
5443         WREG32(GMCON_MISC3, save->gmcon_misc3);
5444         WREG32(GMCON_MISC, save->gmcon_misc);
5445         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5446 }
5447
5448 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5449 {
5450         struct evergreen_mc_save save;
5451         struct kv_reset_save_regs kv_save = { 0 };
5452         u32 tmp, i;
5453
5454         dev_info(rdev->dev, "GPU pci config reset\n");
5455
5456         /* disable dpm? */
5457
5458         /* disable cg/pg */
5459         cik_fini_pg(rdev);
5460         cik_fini_cg(rdev);
5461
5462         /* Disable GFX parsing/prefetching */
5463         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5464
5465         /* Disable MEC parsing/prefetching */
5466         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5467
5468         /* sdma0 */
5469         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5470         tmp |= SDMA_HALT;
5471         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5472         /* sdma1 */
5473         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5474         tmp |= SDMA_HALT;
5475         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5476         /* XXX other engines? */
5477
5478         /* halt the rlc, disable cp internal ints */
5479         cik_rlc_stop(rdev);
5480
5481         udelay(50);
5482
5483         /* disable mem access */
5484         evergreen_mc_stop(rdev, &save);
5485         if (evergreen_mc_wait_for_idle(rdev)) {
5486                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5487         }
5488
5489         if (rdev->flags & RADEON_IS_IGP)
5490                 kv_save_regs_for_reset(rdev, &kv_save);
5491
5492         /* disable BM */
5493         pci_clear_master(rdev->pdev);
5494         /* reset */
5495         radeon_pci_config_reset(rdev);
5496
5497         udelay(100);
5498
5499         /* wait for asic to come out of reset */
5500         for (i = 0; i < rdev->usec_timeout; i++) {
5501                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5502                         break;
5503                 udelay(1);
5504         }
5505
5506         /* does asic init need to be run first??? */
5507         if (rdev->flags & RADEON_IS_IGP)
5508                 kv_restore_regs_for_reset(rdev, &kv_save);
5509 }
5510
5511 /**
5512  * cik_asic_reset - soft reset GPU
5513  *
5514  * @rdev: radeon_device pointer
5515  *
5516  * Look up which blocks are hung and attempt
5517  * to reset them.
5518  * Returns 0 for success.
5519  */
5520 int cik_asic_reset(struct radeon_device *rdev)
5521 {
5522         u32 reset_mask;
5523
5524         reset_mask = cik_gpu_check_soft_reset(rdev);
5525
5526         if (reset_mask)
5527                 r600_set_bios_scratch_engine_hung(rdev, true);
5528
5529         /* try soft reset */
5530         cik_gpu_soft_reset(rdev, reset_mask);
5531
5532         reset_mask = cik_gpu_check_soft_reset(rdev);
5533
5534         /* try pci config reset */
5535         if (reset_mask && radeon_hard_reset)
5536                 cik_gpu_pci_config_reset(rdev);
5537
5538         reset_mask = cik_gpu_check_soft_reset(rdev);
5539
5540         if (!reset_mask)
5541                 r600_set_bios_scratch_engine_hung(rdev, false);
5542
5543         return 0;
5544 }
5545
5546 /**
5547  * cik_gfx_is_lockup - check if the 3D engine is locked up
5548  *
5549  * @rdev: radeon_device pointer
5550  * @ring: radeon_ring structure holding ring information
5551  *
5552  * Check if the 3D engine is locked up (CIK).
5553  * Returns true if the engine is locked, false if not.
5554  */
5555 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5556 {
5557         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5558
5559         if (!(reset_mask & (RADEON_RESET_GFX |
5560                             RADEON_RESET_COMPUTE |
5561                             RADEON_RESET_CP))) {
5562                 radeon_ring_lockup_update(rdev, ring);
5563                 return false;
5564         }
5565         return radeon_ring_test_lockup(rdev, ring);
5566 }
5567
5568 /* MC */
5569 /**
5570  * cik_mc_program - program the GPU memory controller
5571  *
5572  * @rdev: radeon_device pointer
5573  *
5574  * Set the location of vram, gart, and AGP in the GPU's
5575  * physical address space (CIK).
5576  */
5577 static void cik_mc_program(struct radeon_device *rdev)
5578 {
5579         struct evergreen_mc_save save;
5580         u32 tmp;
5581         int i, j;
5582
5583         /* Initialize HDP */
5584         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5585                 WREG32((0x2c14 + j), 0x00000000);
5586                 WREG32((0x2c18 + j), 0x00000000);
5587                 WREG32((0x2c1c + j), 0x00000000);
5588                 WREG32((0x2c20 + j), 0x00000000);
5589                 WREG32((0x2c24 + j), 0x00000000);
5590         }
5591         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5592
5593         evergreen_mc_stop(rdev, &save);
5594         if (radeon_mc_wait_for_idle(rdev)) {
5595                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5596         }
5597         /* Lockout access through VGA aperture*/
5598         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5599         /* Update configuration */
5600         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5601                rdev->mc.vram_start >> 12);
5602         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5603                rdev->mc.vram_end >> 12);
5604         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5605                rdev->vram_scratch.gpu_addr >> 12);
5606         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5607         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5608         WREG32(MC_VM_FB_LOCATION, tmp);
5609         /* XXX double check these! */
5610         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5611         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5612         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5613         WREG32(MC_VM_AGP_BASE, 0);
5614         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5615         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5616         if (radeon_mc_wait_for_idle(rdev)) {
5617                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5618         }
5619         evergreen_mc_resume(rdev, &save);
5620         /* we need to own VRAM, so turn off the VGA renderer here
5621          * to stop it overwriting our objects */
5622         rv515_vga_render_disable(rdev);
5623 }
5624
5625 /**
5626  * cik_mc_init - initialize the memory controller driver params
5627  *
5628  * @rdev: radeon_device pointer
5629  *
5630  * Look up the amount of vram, vram width, and decide how to place
5631  * vram and gart within the GPU's physical address space (CIK).
5632  * Returns 0 for success.
5633  */
5634 static int cik_mc_init(struct radeon_device *rdev)
5635 {
5636         u32 tmp;
5637         int chansize, numchan;
5638
5639         /* Get VRAM informations */
5640         rdev->mc.vram_is_ddr = true;
5641         tmp = RREG32(MC_ARB_RAMCFG);
5642         if (tmp & CHANSIZE_MASK) {
5643                 chansize = 64;
5644         } else {
5645                 chansize = 32;
5646         }
5647         tmp = RREG32(MC_SHARED_CHMAP);
5648         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5649         case 0:
5650         default:
5651                 numchan = 1;
5652                 break;
5653         case 1:
5654                 numchan = 2;
5655                 break;
5656         case 2:
5657                 numchan = 4;
5658                 break;
5659         case 3:
5660                 numchan = 8;
5661                 break;
5662         case 4:
5663                 numchan = 3;
5664                 break;
5665         case 5:
5666                 numchan = 6;
5667                 break;
5668         case 6:
5669                 numchan = 10;
5670                 break;
5671         case 7:
5672                 numchan = 12;
5673                 break;
5674         case 8:
5675                 numchan = 16;
5676                 break;
5677         }
5678         rdev->mc.vram_width = numchan * chansize;
5679         /* Could aper size report 0 ? */
5680         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5681         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5682         /* size in MB on si */
5683         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5684         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5685         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5686         si_vram_gtt_location(rdev, &rdev->mc);
5687         radeon_update_bandwidth_info(rdev);
5688
5689         return 0;
5690 }
5691
5692 /*
5693  * GART
5694  * VMID 0 is the physical GPU addresses as used by the kernel.
5695  * VMIDs 1-15 are used for userspace clients and are handled
5696  * by the radeon vm/hsa code.
5697  */
5698 /**
5699  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5700  *
5701  * @rdev: radeon_device pointer
5702  *
5703  * Flush the TLB for the VMID 0 page table (CIK).
5704  */
5705 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5706 {
5707         /* flush hdp cache */
5708         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5709
5710         /* bits 0-15 are the VM contexts0-15 */
5711         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5712 }
5713
5714 /**
5715  * cik_pcie_gart_enable - gart enable
5716  *
5717  * @rdev: radeon_device pointer
5718  *
5719  * This sets up the TLBs, programs the page tables for VMID0,
5720  * sets up the hw for VMIDs 1-15 which are allocated on
5721  * demand, and sets up the global locations for the LDS, GDS,
5722  * and GPUVM for FSA64 clients (CIK).
5723  * Returns 0 for success, errors for failure.
5724  */
5725 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5726 {
5727         int r, i;
5728
5729         if (rdev->gart.robj == NULL) {
5730                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5731                 return -EINVAL;
5732         }
5733         r = radeon_gart_table_vram_pin(rdev);
5734         if (r)
5735                 return r;
5736         /* Setup TLB control */
5737         WREG32(MC_VM_MX_L1_TLB_CNTL,
5738                (0xA << 7) |
5739                ENABLE_L1_TLB |
5740                ENABLE_L1_FRAGMENT_PROCESSING |
5741                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5742                ENABLE_ADVANCED_DRIVER_MODEL |
5743                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5744         /* Setup L2 cache */
5745         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5746                ENABLE_L2_FRAGMENT_PROCESSING |
5747                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5748                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5749                EFFECTIVE_L2_QUEUE_SIZE(7) |
5750                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5751         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5752         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5753                BANK_SELECT(4) |
5754                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5755         /* setup context0 */
5756         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5757         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5758         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5759         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5760                         (u32)(rdev->dummy_page.addr >> 12));
5761         WREG32(VM_CONTEXT0_CNTL2, 0);
5762         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5763                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5764
5765         WREG32(0x15D4, 0);
5766         WREG32(0x15D8, 0);
5767         WREG32(0x15DC, 0);
5768
5769         /* restore context1-15 */
5770         /* set vm size, must be a multiple of 4 */
5771         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5772         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5773         for (i = 1; i < 16; i++) {
5774                 if (i < 8)
5775                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5776                                rdev->vm_manager.saved_table_addr[i]);
5777                 else
5778                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5779                                rdev->vm_manager.saved_table_addr[i]);
5780         }
5781
5782         /* enable context1-15 */
5783         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5784                (u32)(rdev->dummy_page.addr >> 12));
5785         WREG32(VM_CONTEXT1_CNTL2, 4);
5786         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5787                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5788                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5789                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5790                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5791                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5792                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5793                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5794                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5795                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5796                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5797                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5798                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5799                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5800
5801         if (rdev->family == CHIP_KAVERI) {
5802                 u32 tmp = RREG32(CHUB_CONTROL);
5803                 tmp &= ~BYPASS_VM;
5804                 WREG32(CHUB_CONTROL, tmp);
5805         }
5806
5807         /* XXX SH_MEM regs */
5808         /* where to put LDS, scratch, GPUVM in FSA64 space */
5809         mutex_lock(&rdev->srbm_mutex);
5810         for (i = 0; i < 16; i++) {
5811                 cik_srbm_select(rdev, 0, 0, 0, i);
5812                 /* CP and shaders */
5813                 WREG32(SH_MEM_CONFIG, 0);
5814                 WREG32(SH_MEM_APE1_BASE, 1);
5815                 WREG32(SH_MEM_APE1_LIMIT, 0);
5816                 WREG32(SH_MEM_BASES, 0);
5817                 /* SDMA GFX */
5818                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5819                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5820                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5821                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5822                 /* XXX SDMA RLC - todo */
5823         }
5824         cik_srbm_select(rdev, 0, 0, 0, 0);
5825         mutex_unlock(&rdev->srbm_mutex);
5826
5827         cik_pcie_gart_tlb_flush(rdev);
5828         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5829                  (unsigned)(rdev->mc.gtt_size >> 20),
5830                  (unsigned long long)rdev->gart.table_addr);
5831         rdev->gart.ready = true;
5832         return 0;
5833 }
5834
5835 /**
5836  * cik_pcie_gart_disable - gart disable
5837  *
5838  * @rdev: radeon_device pointer
5839  *
5840  * This disables all VM page table (CIK).
5841  */
5842 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5843 {
5844         unsigned i;
5845
5846         for (i = 1; i < 16; ++i) {
5847                 uint32_t reg;
5848                 if (i < 8)
5849                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5850                 else
5851                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5852                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5853         }
5854
5855         /* Disable all tables */
5856         WREG32(VM_CONTEXT0_CNTL, 0);
5857         WREG32(VM_CONTEXT1_CNTL, 0);
5858         /* Setup TLB control */
5859         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5860                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5861         /* Setup L2 cache */
5862         WREG32(VM_L2_CNTL,
5863                ENABLE_L2_FRAGMENT_PROCESSING |
5864                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5865                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5866                EFFECTIVE_L2_QUEUE_SIZE(7) |
5867                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5868         WREG32(VM_L2_CNTL2, 0);
5869         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5870                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5871         radeon_gart_table_vram_unpin(rdev);
5872 }
5873
5874 /**
5875  * cik_pcie_gart_fini - vm fini callback
5876  *
5877  * @rdev: radeon_device pointer
5878  *
5879  * Tears down the driver GART/VM setup (CIK).
5880  */
5881 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5882 {
5883         cik_pcie_gart_disable(rdev);
5884         radeon_gart_table_vram_free(rdev);
5885         radeon_gart_fini(rdev);
5886 }
5887
5888 /* vm parser */
5889 /**
5890  * cik_ib_parse - vm ib_parse callback
5891  *
5892  * @rdev: radeon_device pointer
5893  * @ib: indirect buffer pointer
5894  *
5895  * CIK uses hw IB checking so this is a nop (CIK).
5896  */
5897 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5898 {
5899         return 0;
5900 }
5901
5902 /*
5903  * vm
5904  * VMID 0 is the physical GPU addresses as used by the kernel.
5905  * VMIDs 1-15 are used for userspace clients and are handled
5906  * by the radeon vm/hsa code.
5907  */
5908 /**
5909  * cik_vm_init - cik vm init callback
5910  *
5911  * @rdev: radeon_device pointer
5912  *
5913  * Inits cik specific vm parameters (number of VMs, base of vram for
5914  * VMIDs 1-15) (CIK).
5915  * Returns 0 for success.
5916  */
5917 int cik_vm_init(struct radeon_device *rdev)
5918 {
5919         /*
5920          * number of VMs
5921          * VMID 0 is reserved for System
5922          * radeon graphics/compute will use VMIDs 1-7
5923          * amdkfd will use VMIDs 8-15
5924          */
5925         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5926         /* base offset of vram pages */
5927         if (rdev->flags & RADEON_IS_IGP) {
5928                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5929                 tmp <<= 22;
5930                 rdev->vm_manager.vram_base_offset = tmp;
5931         } else
5932                 rdev->vm_manager.vram_base_offset = 0;
5933
5934         return 0;
5935 }
5936
5937 /**
5938  * cik_vm_fini - cik vm fini callback
5939  *
5940  * @rdev: radeon_device pointer
5941  *
5942  * Tear down any asic specific VM setup (CIK).
5943  */
5944 void cik_vm_fini(struct radeon_device *rdev)
5945 {
5946 }
5947
5948 /**
5949  * cik_vm_decode_fault - print human readable fault info
5950  *
5951  * @rdev: radeon_device pointer
5952  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5953  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5954  *
5955  * Print human readable fault information (CIK).
5956  */
5957 static void cik_vm_decode_fault(struct radeon_device *rdev,
5958                                 u32 status, u32 addr, u32 mc_client)
5959 {
5960         u32 mc_id;
5961         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5962         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5963         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5964                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5965
5966         if (rdev->family == CHIP_HAWAII)
5967                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5968         else
5969                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5970
5971         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5972                protections, vmid, addr,
5973                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5974                block, mc_client, mc_id);
5975 }
5976
5977 /**
5978  * cik_vm_flush - cik vm flush using the CP
5979  *
5980  * @rdev: radeon_device pointer
5981  *
5982  * Update the page table base and flush the VM TLB
5983  * using the CP (CIK).
5984  */
5985 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5986                   unsigned vm_id, uint64_t pd_addr)
5987 {
5988         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5989
5990         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5991         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5992                                  WRITE_DATA_DST_SEL(0)));
5993         if (vm_id < 8) {
5994                 radeon_ring_write(ring,
5995                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5996         } else {
5997                 radeon_ring_write(ring,
5998                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5999         }
6000         radeon_ring_write(ring, 0);
6001         radeon_ring_write(ring, pd_addr >> 12);
6002
6003         /* update SH_MEM_* regs */
6004         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6005         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6006                                  WRITE_DATA_DST_SEL(0)));
6007         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6008         radeon_ring_write(ring, 0);
6009         radeon_ring_write(ring, VMID(vm_id));
6010
6011         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6012         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6013                                  WRITE_DATA_DST_SEL(0)));
6014         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6015         radeon_ring_write(ring, 0);
6016
6017         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6018         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6019         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6020         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6021
6022         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6023         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6024                                  WRITE_DATA_DST_SEL(0)));
6025         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6026         radeon_ring_write(ring, 0);
6027         radeon_ring_write(ring, VMID(0));
6028
6029         /* HDP flush */
6030         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6031
6032         /* bits 0-15 are the VM contexts0-15 */
6033         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6034         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6035                                  WRITE_DATA_DST_SEL(0)));
6036         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6037         radeon_ring_write(ring, 0);
6038         radeon_ring_write(ring, 1 << vm_id);
6039
6040         /* compute doesn't have PFP */
6041         if (usepfp) {
6042                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6043                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6044                 radeon_ring_write(ring, 0x0);
6045         }
6046 }
6047
6048 /*
6049  * RLC
6050  * The RLC is a multi-purpose microengine that handles a
6051  * variety of functions, the most important of which is
6052  * the interrupt controller.
6053  */
6054 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6055                                           bool enable)
6056 {
6057         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6058
6059         if (enable)
6060                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6061         else
6062                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6063         WREG32(CP_INT_CNTL_RING0, tmp);
6064 }
6065
6066 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6067 {
6068         u32 tmp;
6069
6070         tmp = RREG32(RLC_LB_CNTL);
6071         if (enable)
6072                 tmp |= LOAD_BALANCE_ENABLE;
6073         else
6074                 tmp &= ~LOAD_BALANCE_ENABLE;
6075         WREG32(RLC_LB_CNTL, tmp);
6076 }
6077
6078 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6079 {
6080         u32 i, j, k;
6081         u32 mask;
6082
6083         mutex_lock(&rdev->grbm_idx_mutex);
6084         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6085                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6086                         cik_select_se_sh(rdev, i, j);
6087                         for (k = 0; k < rdev->usec_timeout; k++) {
6088                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6089                                         break;
6090                                 udelay(1);
6091                         }
6092                 }
6093         }
6094         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6095         mutex_unlock(&rdev->grbm_idx_mutex);
6096
6097         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6098         for (k = 0; k < rdev->usec_timeout; k++) {
6099                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6100                         break;
6101                 udelay(1);
6102         }
6103 }
6104
6105 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6106 {
6107         u32 tmp;
6108
6109         tmp = RREG32(RLC_CNTL);
6110         if (tmp != rlc)
6111                 WREG32(RLC_CNTL, rlc);
6112 }
6113
6114 static u32 cik_halt_rlc(struct radeon_device *rdev)
6115 {
6116         u32 data, orig;
6117
6118         orig = data = RREG32(RLC_CNTL);
6119
6120         if (data & RLC_ENABLE) {
6121                 u32 i;
6122
6123                 data &= ~RLC_ENABLE;
6124                 WREG32(RLC_CNTL, data);
6125
6126                 for (i = 0; i < rdev->usec_timeout; i++) {
6127                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6128                                 break;
6129                         udelay(1);
6130                 }
6131
6132                 cik_wait_for_rlc_serdes(rdev);
6133         }
6134
6135         return orig;
6136 }
6137
6138 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6139 {
6140         u32 tmp, i, mask;
6141
6142         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6143         WREG32(RLC_GPR_REG2, tmp);
6144
6145         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6146         for (i = 0; i < rdev->usec_timeout; i++) {
6147                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6148                         break;
6149                 udelay(1);
6150         }
6151
6152         for (i = 0; i < rdev->usec_timeout; i++) {
6153                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6154                         break;
6155                 udelay(1);
6156         }
6157 }
6158
6159 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6160 {
6161         u32 tmp;
6162
6163         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6164         WREG32(RLC_GPR_REG2, tmp);
6165 }
6166
6167 /**
6168  * cik_rlc_stop - stop the RLC ME
6169  *
6170  * @rdev: radeon_device pointer
6171  *
6172  * Halt the RLC ME (MicroEngine) (CIK).
6173  */
6174 static void cik_rlc_stop(struct radeon_device *rdev)
6175 {
6176         WREG32(RLC_CNTL, 0);
6177
6178         cik_enable_gui_idle_interrupt(rdev, false);
6179
6180         cik_wait_for_rlc_serdes(rdev);
6181 }
6182
6183 /**
6184  * cik_rlc_start - start the RLC ME
6185  *
6186  * @rdev: radeon_device pointer
6187  *
6188  * Unhalt the RLC ME (MicroEngine) (CIK).
6189  */
6190 static void cik_rlc_start(struct radeon_device *rdev)
6191 {
6192         WREG32(RLC_CNTL, RLC_ENABLE);
6193
6194         cik_enable_gui_idle_interrupt(rdev, true);
6195
6196         udelay(50);
6197 }
6198
6199 /**
6200  * cik_rlc_resume - setup the RLC hw
6201  *
6202  * @rdev: radeon_device pointer
6203  *
6204  * Initialize the RLC registers, load the ucode,
6205  * and start the RLC (CIK).
6206  * Returns 0 for success, -EINVAL if the ucode is not available.
6207  */
6208 static int cik_rlc_resume(struct radeon_device *rdev)
6209 {
6210         u32 i, size, tmp;
6211
6212         if (!rdev->rlc_fw)
6213                 return -EINVAL;
6214
6215         cik_rlc_stop(rdev);
6216
6217         /* disable CG */
6218         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6219         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6220
6221         si_rlc_reset(rdev);
6222
6223         cik_init_pg(rdev);
6224
6225         cik_init_cg(rdev);
6226
6227         WREG32(RLC_LB_CNTR_INIT, 0);
6228         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6229
6230         mutex_lock(&rdev->grbm_idx_mutex);
6231         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6232         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6233         WREG32(RLC_LB_PARAMS, 0x00600408);
6234         WREG32(RLC_LB_CNTL, 0x80000004);
6235         mutex_unlock(&rdev->grbm_idx_mutex);
6236
6237         WREG32(RLC_MC_CNTL, 0);
6238         WREG32(RLC_UCODE_CNTL, 0);
6239
6240         if (rdev->new_fw) {
6241                 const struct rlc_firmware_header_v1_0 *hdr =
6242                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6243                 const __le32 *fw_data = (const __le32 *)
6244                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6245
6246                 radeon_ucode_print_rlc_hdr(&hdr->header);
6247
6248                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6249                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6250                 for (i = 0; i < size; i++)
6251                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6252                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6253         } else {
6254                 const __be32 *fw_data;
6255
6256                 switch (rdev->family) {
6257                 case CHIP_BONAIRE:
6258                 case CHIP_HAWAII:
6259                 default:
6260                         size = BONAIRE_RLC_UCODE_SIZE;
6261                         break;
6262                 case CHIP_KAVERI:
6263                         size = KV_RLC_UCODE_SIZE;
6264                         break;
6265                 case CHIP_KABINI:
6266                         size = KB_RLC_UCODE_SIZE;
6267                         break;
6268                 case CHIP_MULLINS:
6269                         size = ML_RLC_UCODE_SIZE;
6270                         break;
6271                 }
6272
6273                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6274                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6275                 for (i = 0; i < size; i++)
6276                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6277                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6278         }
6279
6280         /* XXX - find out what chips support lbpw */
6281         cik_enable_lbpw(rdev, false);
6282
6283         if (rdev->family == CHIP_BONAIRE)
6284                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6285
6286         cik_rlc_start(rdev);
6287
6288         return 0;
6289 }
6290
6291 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6292 {
6293         u32 data, orig, tmp, tmp2;
6294
6295         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6296
6297         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6298                 cik_enable_gui_idle_interrupt(rdev, true);
6299
6300                 tmp = cik_halt_rlc(rdev);
6301
6302                 mutex_lock(&rdev->grbm_idx_mutex);
6303                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6304                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6305                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6306                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6307                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6308                 mutex_unlock(&rdev->grbm_idx_mutex);
6309
6310                 cik_update_rlc(rdev, tmp);
6311
6312                 data |= CGCG_EN | CGLS_EN;
6313         } else {
6314                 cik_enable_gui_idle_interrupt(rdev, false);
6315
6316                 RREG32(CB_CGTT_SCLK_CTRL);
6317                 RREG32(CB_CGTT_SCLK_CTRL);
6318                 RREG32(CB_CGTT_SCLK_CTRL);
6319                 RREG32(CB_CGTT_SCLK_CTRL);
6320
6321                 data &= ~(CGCG_EN | CGLS_EN);
6322         }
6323
6324         if (orig != data)
6325                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6326
6327 }
6328
6329 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6330 {
6331         u32 data, orig, tmp = 0;
6332
6333         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6334                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6335                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6336                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6337                                 data |= CP_MEM_LS_EN;
6338                                 if (orig != data)
6339                                         WREG32(CP_MEM_SLP_CNTL, data);
6340                         }
6341                 }
6342
6343                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6344                 data |= 0x00000001;
6345                 data &= 0xfffffffd;
6346                 if (orig != data)
6347                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6348
6349                 tmp = cik_halt_rlc(rdev);
6350
6351                 mutex_lock(&rdev->grbm_idx_mutex);
6352                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6353                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6354                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6355                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6356                 WREG32(RLC_SERDES_WR_CTRL, data);
6357                 mutex_unlock(&rdev->grbm_idx_mutex);
6358
6359                 cik_update_rlc(rdev, tmp);
6360
6361                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6362                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6363                         data &= ~SM_MODE_MASK;
6364                         data |= SM_MODE(0x2);
6365                         data |= SM_MODE_ENABLE;
6366                         data &= ~CGTS_OVERRIDE;
6367                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6368                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6369                                 data &= ~CGTS_LS_OVERRIDE;
6370                         data &= ~ON_MONITOR_ADD_MASK;
6371                         data |= ON_MONITOR_ADD_EN;
6372                         data |= ON_MONITOR_ADD(0x96);
6373                         if (orig != data)
6374                                 WREG32(CGTS_SM_CTRL_REG, data);
6375                 }
6376         } else {
6377                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6378                 data |= 0x00000003;
6379                 if (orig != data)
6380                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6381
6382                 data = RREG32(RLC_MEM_SLP_CNTL);
6383                 if (data & RLC_MEM_LS_EN) {
6384                         data &= ~RLC_MEM_LS_EN;
6385                         WREG32(RLC_MEM_SLP_CNTL, data);
6386                 }
6387
6388                 data = RREG32(CP_MEM_SLP_CNTL);
6389                 if (data & CP_MEM_LS_EN) {
6390                         data &= ~CP_MEM_LS_EN;
6391                         WREG32(CP_MEM_SLP_CNTL, data);
6392                 }
6393
6394                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6395                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6396                 if (orig != data)
6397                         WREG32(CGTS_SM_CTRL_REG, data);
6398
6399                 tmp = cik_halt_rlc(rdev);
6400
6401                 mutex_lock(&rdev->grbm_idx_mutex);
6402                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6403                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6404                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6405                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6406                 WREG32(RLC_SERDES_WR_CTRL, data);
6407                 mutex_unlock(&rdev->grbm_idx_mutex);
6408
6409                 cik_update_rlc(rdev, tmp);
6410         }
6411 }
6412
6413 static const u32 mc_cg_registers[] =
6414 {
6415         MC_HUB_MISC_HUB_CG,
6416         MC_HUB_MISC_SIP_CG,
6417         MC_HUB_MISC_VM_CG,
6418         MC_XPB_CLK_GAT,
6419         ATC_MISC_CG,
6420         MC_CITF_MISC_WR_CG,
6421         MC_CITF_MISC_RD_CG,
6422         MC_CITF_MISC_VM_CG,
6423         VM_L2_CG,
6424 };
6425
6426 static void cik_enable_mc_ls(struct radeon_device *rdev,
6427                              bool enable)
6428 {
6429         int i;
6430         u32 orig, data;
6431
6432         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6433                 orig = data = RREG32(mc_cg_registers[i]);
6434                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6435                         data |= MC_LS_ENABLE;
6436                 else
6437                         data &= ~MC_LS_ENABLE;
6438                 if (data != orig)
6439                         WREG32(mc_cg_registers[i], data);
6440         }
6441 }
6442
6443 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6444                                bool enable)
6445 {
6446         int i;
6447         u32 orig, data;
6448
6449         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6450                 orig = data = RREG32(mc_cg_registers[i]);
6451                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6452                         data |= MC_CG_ENABLE;
6453                 else
6454                         data &= ~MC_CG_ENABLE;
6455                 if (data != orig)
6456                         WREG32(mc_cg_registers[i], data);
6457         }
6458 }
6459
6460 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6461                                  bool enable)
6462 {
6463         u32 orig, data;
6464
6465         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6466                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6467                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6468         } else {
6469                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6470                 data |= 0xff000000;
6471                 if (data != orig)
6472                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6473
6474                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6475                 data |= 0xff000000;
6476                 if (data != orig)
6477                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6478         }
6479 }
6480
6481 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6482                                  bool enable)
6483 {
6484         u32 orig, data;
6485
6486         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6487                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6488                 data |= 0x100;
6489                 if (orig != data)
6490                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6491
6492                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6493                 data |= 0x100;
6494                 if (orig != data)
6495                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6496         } else {
6497                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6498                 data &= ~0x100;
6499                 if (orig != data)
6500                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6501
6502                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6503                 data &= ~0x100;
6504                 if (orig != data)
6505                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6506         }
6507 }
6508
6509 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6510                                 bool enable)
6511 {
6512         u32 orig, data;
6513
6514         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6515                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6516                 data = 0xfff;
6517                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6518
6519                 orig = data = RREG32(UVD_CGC_CTRL);
6520                 data |= DCM;
6521                 if (orig != data)
6522                         WREG32(UVD_CGC_CTRL, data);
6523         } else {
6524                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6525                 data &= ~0xfff;
6526                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6527
6528                 orig = data = RREG32(UVD_CGC_CTRL);
6529                 data &= ~DCM;
6530                 if (orig != data)
6531                         WREG32(UVD_CGC_CTRL, data);
6532         }
6533 }
6534
6535 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6536                                bool enable)
6537 {
6538         u32 orig, data;
6539
6540         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6541
6542         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6543                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6544                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6545         else
6546                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6547                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6548
6549         if (orig != data)
6550                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6551 }
6552
6553 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6554                                 bool enable)
6555 {
6556         u32 orig, data;
6557
6558         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6559
6560         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6561                 data &= ~CLOCK_GATING_DIS;
6562         else
6563                 data |= CLOCK_GATING_DIS;
6564
6565         if (orig != data)
6566                 WREG32(HDP_HOST_PATH_CNTL, data);
6567 }
6568
6569 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6570                               bool enable)
6571 {
6572         u32 orig, data;
6573
6574         orig = data = RREG32(HDP_MEM_POWER_LS);
6575
6576         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6577                 data |= HDP_LS_ENABLE;
6578         else
6579                 data &= ~HDP_LS_ENABLE;
6580
6581         if (orig != data)
6582                 WREG32(HDP_MEM_POWER_LS, data);
6583 }
6584
6585 void cik_update_cg(struct radeon_device *rdev,
6586                    u32 block, bool enable)
6587 {
6588
6589         if (block & RADEON_CG_BLOCK_GFX) {
6590                 cik_enable_gui_idle_interrupt(rdev, false);
6591                 /* order matters! */
6592                 if (enable) {
6593                         cik_enable_mgcg(rdev, true);
6594                         cik_enable_cgcg(rdev, true);
6595                 } else {
6596                         cik_enable_cgcg(rdev, false);
6597                         cik_enable_mgcg(rdev, false);
6598                 }
6599                 cik_enable_gui_idle_interrupt(rdev, true);
6600         }
6601
6602         if (block & RADEON_CG_BLOCK_MC) {
6603                 if (!(rdev->flags & RADEON_IS_IGP)) {
6604                         cik_enable_mc_mgcg(rdev, enable);
6605                         cik_enable_mc_ls(rdev, enable);
6606                 }
6607         }
6608
6609         if (block & RADEON_CG_BLOCK_SDMA) {
6610                 cik_enable_sdma_mgcg(rdev, enable);
6611                 cik_enable_sdma_mgls(rdev, enable);
6612         }
6613
6614         if (block & RADEON_CG_BLOCK_BIF) {
6615                 cik_enable_bif_mgls(rdev, enable);
6616         }
6617
6618         if (block & RADEON_CG_BLOCK_UVD) {
6619                 if (rdev->has_uvd)
6620                         cik_enable_uvd_mgcg(rdev, enable);
6621         }
6622
6623         if (block & RADEON_CG_BLOCK_HDP) {
6624                 cik_enable_hdp_mgcg(rdev, enable);
6625                 cik_enable_hdp_ls(rdev, enable);
6626         }
6627
6628         if (block & RADEON_CG_BLOCK_VCE) {
6629                 vce_v2_0_enable_mgcg(rdev, enable);
6630         }
6631 }
6632
6633 static void cik_init_cg(struct radeon_device *rdev)
6634 {
6635
6636         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6637
6638         if (rdev->has_uvd)
6639                 si_init_uvd_internal_cg(rdev);
6640
6641         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6642                              RADEON_CG_BLOCK_SDMA |
6643                              RADEON_CG_BLOCK_BIF |
6644                              RADEON_CG_BLOCK_UVD |
6645                              RADEON_CG_BLOCK_HDP), true);
6646 }
6647
6648 static void cik_fini_cg(struct radeon_device *rdev)
6649 {
6650         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6651                              RADEON_CG_BLOCK_SDMA |
6652                              RADEON_CG_BLOCK_BIF |
6653                              RADEON_CG_BLOCK_UVD |
6654                              RADEON_CG_BLOCK_HDP), false);
6655
6656         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6657 }
6658
6659 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6660                                           bool enable)
6661 {
6662         u32 data, orig;
6663
6664         orig = data = RREG32(RLC_PG_CNTL);
6665         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6666                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6667         else
6668                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6669         if (orig != data)
6670                 WREG32(RLC_PG_CNTL, data);
6671 }
6672
6673 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6674                                           bool enable)
6675 {
6676         u32 data, orig;
6677
6678         orig = data = RREG32(RLC_PG_CNTL);
6679         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6680                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6681         else
6682                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6683         if (orig != data)
6684                 WREG32(RLC_PG_CNTL, data);
6685 }
6686
6687 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6688 {
6689         u32 data, orig;
6690
6691         orig = data = RREG32(RLC_PG_CNTL);
6692         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6693                 data &= ~DISABLE_CP_PG;
6694         else
6695                 data |= DISABLE_CP_PG;
6696         if (orig != data)
6697                 WREG32(RLC_PG_CNTL, data);
6698 }
6699
6700 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6701 {
6702         u32 data, orig;
6703
6704         orig = data = RREG32(RLC_PG_CNTL);
6705         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6706                 data &= ~DISABLE_GDS_PG;
6707         else
6708                 data |= DISABLE_GDS_PG;
6709         if (orig != data)
6710                 WREG32(RLC_PG_CNTL, data);
6711 }
6712
6713 #define CP_ME_TABLE_SIZE    96
6714 #define CP_ME_TABLE_OFFSET  2048
6715 #define CP_MEC_TABLE_OFFSET 4096
6716
6717 void cik_init_cp_pg_table(struct radeon_device *rdev)
6718 {
6719         volatile u32 *dst_ptr;
6720         int me, i, max_me = 4;
6721         u32 bo_offset = 0;
6722         u32 table_offset, table_size;
6723
6724         if (rdev->family == CHIP_KAVERI)
6725                 max_me = 5;
6726
6727         if (rdev->rlc.cp_table_ptr == NULL)
6728                 return;
6729
6730         /* write the cp table buffer */
6731         dst_ptr = rdev->rlc.cp_table_ptr;
6732         for (me = 0; me < max_me; me++) {
6733                 if (rdev->new_fw) {
6734                         const __le32 *fw_data;
6735                         const struct gfx_firmware_header_v1_0 *hdr;
6736
6737                         if (me == 0) {
6738                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6739                                 fw_data = (const __le32 *)
6740                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6741                                 table_offset = le32_to_cpu(hdr->jt_offset);
6742                                 table_size = le32_to_cpu(hdr->jt_size);
6743                         } else if (me == 1) {
6744                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6745                                 fw_data = (const __le32 *)
6746                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6747                                 table_offset = le32_to_cpu(hdr->jt_offset);
6748                                 table_size = le32_to_cpu(hdr->jt_size);
6749                         } else if (me == 2) {
6750                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6751                                 fw_data = (const __le32 *)
6752                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6753                                 table_offset = le32_to_cpu(hdr->jt_offset);
6754                                 table_size = le32_to_cpu(hdr->jt_size);
6755                         } else if (me == 3) {
6756                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6757                                 fw_data = (const __le32 *)
6758                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6759                                 table_offset = le32_to_cpu(hdr->jt_offset);
6760                                 table_size = le32_to_cpu(hdr->jt_size);
6761                         } else {
6762                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6763                                 fw_data = (const __le32 *)
6764                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6765                                 table_offset = le32_to_cpu(hdr->jt_offset);
6766                                 table_size = le32_to_cpu(hdr->jt_size);
6767                         }
6768
6769                         for (i = 0; i < table_size; i ++) {
6770                                 dst_ptr[bo_offset + i] =
6771                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6772                         }
6773                         bo_offset += table_size;
6774                 } else {
6775                         const __be32 *fw_data;
6776                         table_size = CP_ME_TABLE_SIZE;
6777
6778                         if (me == 0) {
6779                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6780                                 table_offset = CP_ME_TABLE_OFFSET;
6781                         } else if (me == 1) {
6782                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6783                                 table_offset = CP_ME_TABLE_OFFSET;
6784                         } else if (me == 2) {
6785                                 fw_data = (const __be32 *)rdev->me_fw->data;
6786                                 table_offset = CP_ME_TABLE_OFFSET;
6787                         } else {
6788                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6789                                 table_offset = CP_MEC_TABLE_OFFSET;
6790                         }
6791
6792                         for (i = 0; i < table_size; i ++) {
6793                                 dst_ptr[bo_offset + i] =
6794                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6795                         }
6796                         bo_offset += table_size;
6797                 }
6798         }
6799 }
6800
6801 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6802                                 bool enable)
6803 {
6804         u32 data, orig;
6805
6806         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6807                 orig = data = RREG32(RLC_PG_CNTL);
6808                 data |= GFX_PG_ENABLE;
6809                 if (orig != data)
6810                         WREG32(RLC_PG_CNTL, data);
6811
6812                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6813                 data |= AUTO_PG_EN;
6814                 if (orig != data)
6815                         WREG32(RLC_AUTO_PG_CTRL, data);
6816         } else {
6817                 orig = data = RREG32(RLC_PG_CNTL);
6818                 data &= ~GFX_PG_ENABLE;
6819                 if (orig != data)
6820                         WREG32(RLC_PG_CNTL, data);
6821
6822                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6823                 data &= ~AUTO_PG_EN;
6824                 if (orig != data)
6825                         WREG32(RLC_AUTO_PG_CTRL, data);
6826
6827                 data = RREG32(DB_RENDER_CONTROL);
6828         }
6829 }
6830
6831 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6832 {
6833         u32 mask = 0, tmp, tmp1;
6834         int i;
6835
6836         mutex_lock(&rdev->grbm_idx_mutex);
6837         cik_select_se_sh(rdev, se, sh);
6838         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6839         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6840         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6841         mutex_unlock(&rdev->grbm_idx_mutex);
6842
6843         tmp &= 0xffff0000;
6844
6845         tmp |= tmp1;
6846         tmp >>= 16;
6847
6848         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6849                 mask <<= 1;
6850                 mask |= 1;
6851         }
6852
6853         return (~tmp) & mask;
6854 }
6855
6856 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6857 {
6858         u32 i, j, k, active_cu_number = 0;
6859         u32 mask, counter, cu_bitmap;
6860         u32 tmp = 0;
6861
6862         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6863                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6864                         mask = 1;
6865                         cu_bitmap = 0;
6866                         counter = 0;
6867                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6868                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6869                                         if (counter < 2)
6870                                                 cu_bitmap |= mask;
6871                                         counter ++;
6872                                 }
6873                                 mask <<= 1;
6874                         }
6875
6876                         active_cu_number += counter;
6877                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6878                 }
6879         }
6880
6881         WREG32(RLC_PG_AO_CU_MASK, tmp);
6882
6883         tmp = RREG32(RLC_MAX_PG_CU);
6884         tmp &= ~MAX_PU_CU_MASK;
6885         tmp |= MAX_PU_CU(active_cu_number);
6886         WREG32(RLC_MAX_PG_CU, tmp);
6887 }
6888
6889 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6890                                        bool enable)
6891 {
6892         u32 data, orig;
6893
6894         orig = data = RREG32(RLC_PG_CNTL);
6895         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6896                 data |= STATIC_PER_CU_PG_ENABLE;
6897         else
6898                 data &= ~STATIC_PER_CU_PG_ENABLE;
6899         if (orig != data)
6900                 WREG32(RLC_PG_CNTL, data);
6901 }
6902
6903 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6904                                         bool enable)
6905 {
6906         u32 data, orig;
6907
6908         orig = data = RREG32(RLC_PG_CNTL);
6909         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6910                 data |= DYN_PER_CU_PG_ENABLE;
6911         else
6912                 data &= ~DYN_PER_CU_PG_ENABLE;
6913         if (orig != data)
6914                 WREG32(RLC_PG_CNTL, data);
6915 }
6916
6917 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6918 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6919
6920 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6921 {
6922         u32 data, orig;
6923         u32 i;
6924
6925         if (rdev->rlc.cs_data) {
6926                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6927                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6928                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6929                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6930         } else {
6931                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6932                 for (i = 0; i < 3; i++)
6933                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6934         }
6935         if (rdev->rlc.reg_list) {
6936                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6937                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6938                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6939         }
6940
6941         orig = data = RREG32(RLC_PG_CNTL);
6942         data |= GFX_PG_SRC;
6943         if (orig != data)
6944                 WREG32(RLC_PG_CNTL, data);
6945
6946         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6947         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6948
6949         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6950         data &= ~IDLE_POLL_COUNT_MASK;
6951         data |= IDLE_POLL_COUNT(0x60);
6952         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6953
6954         data = 0x10101010;
6955         WREG32(RLC_PG_DELAY, data);
6956
6957         data = RREG32(RLC_PG_DELAY_2);
6958         data &= ~0xff;
6959         data |= 0x3;
6960         WREG32(RLC_PG_DELAY_2, data);
6961
6962         data = RREG32(RLC_AUTO_PG_CTRL);
6963         data &= ~GRBM_REG_SGIT_MASK;
6964         data |= GRBM_REG_SGIT(0x700);
6965         WREG32(RLC_AUTO_PG_CTRL, data);
6966
6967 }
6968
6969 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6970 {
6971         cik_enable_gfx_cgpg(rdev, enable);
6972         cik_enable_gfx_static_mgpg(rdev, enable);
6973         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6974 }
6975
6976 u32 cik_get_csb_size(struct radeon_device *rdev)
6977 {
6978         u32 count = 0;
6979         const struct cs_section_def *sect = NULL;
6980         const struct cs_extent_def *ext = NULL;
6981
6982         if (rdev->rlc.cs_data == NULL)
6983                 return 0;
6984
6985         /* begin clear state */
6986         count += 2;
6987         /* context control state */
6988         count += 3;
6989
6990         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6991                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6992                         if (sect->id == SECT_CONTEXT)
6993                                 count += 2 + ext->reg_count;
6994                         else
6995                                 return 0;
6996                 }
6997         }
6998         /* pa_sc_raster_config/pa_sc_raster_config1 */
6999         count += 4;
7000         /* end clear state */
7001         count += 2;
7002         /* clear state */
7003         count += 2;
7004
7005         return count;
7006 }
7007
7008 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7009 {
7010         u32 count = 0, i;
7011         const struct cs_section_def *sect = NULL;
7012         const struct cs_extent_def *ext = NULL;
7013
7014         if (rdev->rlc.cs_data == NULL)
7015                 return;
7016         if (buffer == NULL)
7017                 return;
7018
7019         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7020         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7021
7022         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7023         buffer[count++] = cpu_to_le32(0x80000000);
7024         buffer[count++] = cpu_to_le32(0x80000000);
7025
7026         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7027                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7028                         if (sect->id == SECT_CONTEXT) {
7029                                 buffer[count++] =
7030                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7031                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7032                                 for (i = 0; i < ext->reg_count; i++)
7033                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7034                         } else {
7035                                 return;
7036                         }
7037                 }
7038         }
7039
7040         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7041         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7042         switch (rdev->family) {
7043         case CHIP_BONAIRE:
7044                 buffer[count++] = cpu_to_le32(0x16000012);
7045                 buffer[count++] = cpu_to_le32(0x00000000);
7046                 break;
7047         case CHIP_KAVERI:
7048                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7049                 buffer[count++] = cpu_to_le32(0x00000000);
7050                 break;
7051         case CHIP_KABINI:
7052         case CHIP_MULLINS:
7053                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7054                 buffer[count++] = cpu_to_le32(0x00000000);
7055                 break;
7056         case CHIP_HAWAII:
7057                 buffer[count++] = cpu_to_le32(0x3a00161a);
7058                 buffer[count++] = cpu_to_le32(0x0000002e);
7059                 break;
7060         default:
7061                 buffer[count++] = cpu_to_le32(0x00000000);
7062                 buffer[count++] = cpu_to_le32(0x00000000);
7063                 break;
7064         }
7065
7066         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7067         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7068
7069         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7070         buffer[count++] = cpu_to_le32(0);
7071 }
7072
7073 static void cik_init_pg(struct radeon_device *rdev)
7074 {
7075         if (rdev->pg_flags) {
7076                 cik_enable_sck_slowdown_on_pu(rdev, true);
7077                 cik_enable_sck_slowdown_on_pd(rdev, true);
7078                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7079                         cik_init_gfx_cgpg(rdev);
7080                         cik_enable_cp_pg(rdev, true);
7081                         cik_enable_gds_pg(rdev, true);
7082                 }
7083                 cik_init_ao_cu_mask(rdev);
7084                 cik_update_gfx_pg(rdev, true);
7085         }
7086 }
7087
7088 static void cik_fini_pg(struct radeon_device *rdev)
7089 {
7090         if (rdev->pg_flags) {
7091                 cik_update_gfx_pg(rdev, false);
7092                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7093                         cik_enable_cp_pg(rdev, false);
7094                         cik_enable_gds_pg(rdev, false);
7095                 }
7096         }
7097 }
7098
7099 /*
7100  * Interrupts
7101  * Starting with r6xx, interrupts are handled via a ring buffer.
7102  * Ring buffers are areas of GPU accessible memory that the GPU
7103  * writes interrupt vectors into and the host reads vectors out of.
7104  * There is a rptr (read pointer) that determines where the
7105  * host is currently reading, and a wptr (write pointer)
7106  * which determines where the GPU has written.  When the
7107  * pointers are equal, the ring is idle.  When the GPU
7108  * writes vectors to the ring buffer, it increments the
7109  * wptr.  When there is an interrupt, the host then starts
7110  * fetching commands and processing them until the pointers are
7111  * equal again at which point it updates the rptr.
7112  */
7113
7114 /**
7115  * cik_enable_interrupts - Enable the interrupt ring buffer
7116  *
7117  * @rdev: radeon_device pointer
7118  *
7119  * Enable the interrupt ring buffer (CIK).
7120  */
7121 static void cik_enable_interrupts(struct radeon_device *rdev)
7122 {
7123         u32 ih_cntl = RREG32(IH_CNTL);
7124         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7125
7126         ih_cntl |= ENABLE_INTR;
7127         ih_rb_cntl |= IH_RB_ENABLE;
7128         WREG32(IH_CNTL, ih_cntl);
7129         WREG32(IH_RB_CNTL, ih_rb_cntl);
7130         rdev->ih.enabled = true;
7131 }
7132
7133 /**
7134  * cik_disable_interrupts - Disable the interrupt ring buffer
7135  *
7136  * @rdev: radeon_device pointer
7137  *
7138  * Disable the interrupt ring buffer (CIK).
7139  */
7140 static void cik_disable_interrupts(struct radeon_device *rdev)
7141 {
7142         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7143         u32 ih_cntl = RREG32(IH_CNTL);
7144
7145         ih_rb_cntl &= ~IH_RB_ENABLE;
7146         ih_cntl &= ~ENABLE_INTR;
7147         WREG32(IH_RB_CNTL, ih_rb_cntl);
7148         WREG32(IH_CNTL, ih_cntl);
7149         /* set rptr, wptr to 0 */
7150         WREG32(IH_RB_RPTR, 0);
7151         WREG32(IH_RB_WPTR, 0);
7152         rdev->ih.enabled = false;
7153         rdev->ih.rptr = 0;
7154 }
7155
7156 /**
7157  * cik_disable_interrupt_state - Disable all interrupt sources
7158  *
7159  * @rdev: radeon_device pointer
7160  *
7161  * Clear all interrupt enable bits used by the driver (CIK).
7162  */
7163 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7164 {
7165         u32 tmp;
7166
7167         /* gfx ring */
7168         tmp = RREG32(CP_INT_CNTL_RING0) &
7169                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7170         WREG32(CP_INT_CNTL_RING0, tmp);
7171         /* sdma */
7172         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7173         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7174         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7175         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7176         /* compute queues */
7177         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7178         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7179         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7180         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7181         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7182         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7183         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7184         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7185         /* grbm */
7186         WREG32(GRBM_INT_CNTL, 0);
7187         /* vline/vblank, etc. */
7188         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7189         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7190         if (rdev->num_crtc >= 4) {
7191                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7192                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7193         }
7194         if (rdev->num_crtc >= 6) {
7195                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7196                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7197         }
7198         /* pflip */
7199         if (rdev->num_crtc >= 2) {
7200                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7201                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7202         }
7203         if (rdev->num_crtc >= 4) {
7204                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7205                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7206         }
7207         if (rdev->num_crtc >= 6) {
7208                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7209                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7210         }
7211
7212         /* dac hotplug */
7213         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7214
7215         /* digital hotplug */
7216         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7217         WREG32(DC_HPD1_INT_CONTROL, tmp);
7218         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7219         WREG32(DC_HPD2_INT_CONTROL, tmp);
7220         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7221         WREG32(DC_HPD3_INT_CONTROL, tmp);
7222         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7223         WREG32(DC_HPD4_INT_CONTROL, tmp);
7224         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7225         WREG32(DC_HPD5_INT_CONTROL, tmp);
7226         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7227         WREG32(DC_HPD6_INT_CONTROL, tmp);
7228
7229 }
7230
7231 /**
7232  * cik_irq_init - init and enable the interrupt ring
7233  *
7234  * @rdev: radeon_device pointer
7235  *
7236  * Allocate a ring buffer for the interrupt controller,
7237  * enable the RLC, disable interrupts, enable the IH
7238  * ring buffer and enable it (CIK).
7239  * Called at device load and reume.
7240  * Returns 0 for success, errors for failure.
7241  */
7242 static int cik_irq_init(struct radeon_device *rdev)
7243 {
7244         int ret = 0;
7245         int rb_bufsz;
7246         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7247
7248         /* allocate ring */
7249         ret = r600_ih_ring_alloc(rdev);
7250         if (ret)
7251                 return ret;
7252
7253         /* disable irqs */
7254         cik_disable_interrupts(rdev);
7255
7256         /* init rlc */
7257         ret = cik_rlc_resume(rdev);
7258         if (ret) {
7259                 r600_ih_ring_fini(rdev);
7260                 return ret;
7261         }
7262
7263         /* setup interrupt control */
7264         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7265         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7266         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7267         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7268          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7269          */
7270         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7271         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7272         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7273         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7274
7275         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7276         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7277
7278         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7279                       IH_WPTR_OVERFLOW_CLEAR |
7280                       (rb_bufsz << 1));
7281
7282         if (rdev->wb.enabled)
7283                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7284
7285         /* set the writeback address whether it's enabled or not */
7286         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7287         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7288
7289         WREG32(IH_RB_CNTL, ih_rb_cntl);
7290
7291         /* set rptr, wptr to 0 */
7292         WREG32(IH_RB_RPTR, 0);
7293         WREG32(IH_RB_WPTR, 0);
7294
7295         /* Default settings for IH_CNTL (disabled at first) */
7296         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7297         /* RPTR_REARM only works if msi's are enabled */
7298         if (rdev->msi_enabled)
7299                 ih_cntl |= RPTR_REARM;
7300         WREG32(IH_CNTL, ih_cntl);
7301
7302         /* force the active interrupt state to all disabled */
7303         cik_disable_interrupt_state(rdev);
7304
7305         pci_set_master(rdev->pdev);
7306
7307         /* enable irqs */
7308         cik_enable_interrupts(rdev);
7309
7310         return ret;
7311 }
7312
7313 /**
7314  * cik_irq_set - enable/disable interrupt sources
7315  *
7316  * @rdev: radeon_device pointer
7317  *
7318  * Enable interrupt sources on the GPU (vblanks, hpd,
7319  * etc.) (CIK).
7320  * Returns 0 for success, errors for failure.
7321  */
7322 int cik_irq_set(struct radeon_device *rdev)
7323 {
7324         u32 cp_int_cntl;
7325         u32 cp_m1p0;
7326         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7327         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7328         u32 grbm_int_cntl = 0;
7329         u32 dma_cntl, dma_cntl1;
7330         u32 thermal_int;
7331
7332         if (!rdev->irq.installed) {
7333                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7334                 return -EINVAL;
7335         }
7336         /* don't enable anything if the ih is disabled */
7337         if (!rdev->ih.enabled) {
7338                 cik_disable_interrupts(rdev);
7339                 /* force the active interrupt state to all disabled */
7340                 cik_disable_interrupt_state(rdev);
7341                 return 0;
7342         }
7343
7344         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7345                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7346         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7347
7348         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7349         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7350         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7351         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7352         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7353         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7354
7355         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7356         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7357
7358         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7359
7360         if (rdev->flags & RADEON_IS_IGP)
7361                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7362                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
7363         else
7364                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7365                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7366
7367         /* enable CP interrupts on all rings */
7368         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7369                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7370                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7371         }
7372         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7373                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7374                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7375                 if (ring->me == 1) {
7376                         switch (ring->pipe) {
7377                         case 0:
7378                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7379                                 break;
7380                         default:
7381                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7382                                 break;
7383                         }
7384                 } else {
7385                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7386                 }
7387         }
7388         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7389                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7390                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7391                 if (ring->me == 1) {
7392                         switch (ring->pipe) {
7393                         case 0:
7394                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7395                                 break;
7396                         default:
7397                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7398                                 break;
7399                         }
7400                 } else {
7401                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7402                 }
7403         }
7404
7405         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7406                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7407                 dma_cntl |= TRAP_ENABLE;
7408         }
7409
7410         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7411                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7412                 dma_cntl1 |= TRAP_ENABLE;
7413         }
7414
7415         if (rdev->irq.crtc_vblank_int[0] ||
7416             atomic_read(&rdev->irq.pflip[0])) {
7417                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7418                 crtc1 |= VBLANK_INTERRUPT_MASK;
7419         }
7420         if (rdev->irq.crtc_vblank_int[1] ||
7421             atomic_read(&rdev->irq.pflip[1])) {
7422                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7423                 crtc2 |= VBLANK_INTERRUPT_MASK;
7424         }
7425         if (rdev->irq.crtc_vblank_int[2] ||
7426             atomic_read(&rdev->irq.pflip[2])) {
7427                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7428                 crtc3 |= VBLANK_INTERRUPT_MASK;
7429         }
7430         if (rdev->irq.crtc_vblank_int[3] ||
7431             atomic_read(&rdev->irq.pflip[3])) {
7432                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7433                 crtc4 |= VBLANK_INTERRUPT_MASK;
7434         }
7435         if (rdev->irq.crtc_vblank_int[4] ||
7436             atomic_read(&rdev->irq.pflip[4])) {
7437                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7438                 crtc5 |= VBLANK_INTERRUPT_MASK;
7439         }
7440         if (rdev->irq.crtc_vblank_int[5] ||
7441             atomic_read(&rdev->irq.pflip[5])) {
7442                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7443                 crtc6 |= VBLANK_INTERRUPT_MASK;
7444         }
7445         if (rdev->irq.hpd[0]) {
7446                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7447                 hpd1 |= DC_HPDx_INT_EN;
7448         }
7449         if (rdev->irq.hpd[1]) {
7450                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7451                 hpd2 |= DC_HPDx_INT_EN;
7452         }
7453         if (rdev->irq.hpd[2]) {
7454                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7455                 hpd3 |= DC_HPDx_INT_EN;
7456         }
7457         if (rdev->irq.hpd[3]) {
7458                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7459                 hpd4 |= DC_HPDx_INT_EN;
7460         }
7461         if (rdev->irq.hpd[4]) {
7462                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7463                 hpd5 |= DC_HPDx_INT_EN;
7464         }
7465         if (rdev->irq.hpd[5]) {
7466                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7467                 hpd6 |= DC_HPDx_INT_EN;
7468         }
7469
7470         if (rdev->irq.dpm_thermal) {
7471                 DRM_DEBUG("dpm thermal\n");
7472                 if (rdev->flags & RADEON_IS_IGP)
7473                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7474                 else
7475                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7476         }
7477
7478         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7479
7480         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7481         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7482
7483         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7484
7485         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7486
7487         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7488         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7489         if (rdev->num_crtc >= 4) {
7490                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7491                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7492         }
7493         if (rdev->num_crtc >= 6) {
7494                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7495                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7496         }
7497
7498         if (rdev->num_crtc >= 2) {
7499                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7500                        GRPH_PFLIP_INT_MASK);
7501                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7502                        GRPH_PFLIP_INT_MASK);
7503         }
7504         if (rdev->num_crtc >= 4) {
7505                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7506                        GRPH_PFLIP_INT_MASK);
7507                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7508                        GRPH_PFLIP_INT_MASK);
7509         }
7510         if (rdev->num_crtc >= 6) {
7511                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7512                        GRPH_PFLIP_INT_MASK);
7513                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7514                        GRPH_PFLIP_INT_MASK);
7515         }
7516
7517         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7518         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7519         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7520         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7521         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7522         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7523
7524         if (rdev->flags & RADEON_IS_IGP)
7525                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7526         else
7527                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7528
7529         return 0;
7530 }
7531
7532 /**
7533  * cik_irq_ack - ack interrupt sources
7534  *
7535  * @rdev: radeon_device pointer
7536  *
7537  * Ack interrupt sources on the GPU (vblanks, hpd,
7538  * etc.) (CIK).  Certain interrupts sources are sw
7539  * generated and do not require an explicit ack.
7540  */
7541 static inline void cik_irq_ack(struct radeon_device *rdev)
7542 {
7543         u32 tmp;
7544
7545         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7546         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7547         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7548         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7549         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7550         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7551         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7552
7553         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7554                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7555         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7556                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7557         if (rdev->num_crtc >= 4) {
7558                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7559                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7560                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7561                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7562         }
7563         if (rdev->num_crtc >= 6) {
7564                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7565                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7566                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7567                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7568         }
7569
7570         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7571                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7572                        GRPH_PFLIP_INT_CLEAR);
7573         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7574                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7575                        GRPH_PFLIP_INT_CLEAR);
7576         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7577                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7578         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7579                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7580         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7581                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7582         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7583                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7584
7585         if (rdev->num_crtc >= 4) {
7586                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7587                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7588                                GRPH_PFLIP_INT_CLEAR);
7589                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7590                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7591                                GRPH_PFLIP_INT_CLEAR);
7592                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7593                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7594                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7595                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7596                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7597                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7598                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7599                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7600         }
7601
7602         if (rdev->num_crtc >= 6) {
7603                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7604                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7605                                GRPH_PFLIP_INT_CLEAR);
7606                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7607                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7608                                GRPH_PFLIP_INT_CLEAR);
7609                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7610                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7611                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7612                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7613                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7614                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7615                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7616                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7617         }
7618
7619         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7620                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7621                 tmp |= DC_HPDx_INT_ACK;
7622                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7623         }
7624         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7625                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7626                 tmp |= DC_HPDx_INT_ACK;
7627                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7628         }
7629         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7630                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7631                 tmp |= DC_HPDx_INT_ACK;
7632                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7633         }
7634         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7635                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7636                 tmp |= DC_HPDx_INT_ACK;
7637                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7638         }
7639         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7640                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7641                 tmp |= DC_HPDx_INT_ACK;
7642                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7643         }
7644         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7645                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7646                 tmp |= DC_HPDx_INT_ACK;
7647                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7648         }
7649 }
7650
7651 /**
7652  * cik_irq_disable - disable interrupts
7653  *
7654  * @rdev: radeon_device pointer
7655  *
7656  * Disable interrupts on the hw (CIK).
7657  */
7658 static void cik_irq_disable(struct radeon_device *rdev)
7659 {
7660         cik_disable_interrupts(rdev);
7661         /* Wait and acknowledge irq */
7662         mdelay(1);
7663         cik_irq_ack(rdev);
7664         cik_disable_interrupt_state(rdev);
7665 }
7666
7667 /**
7668  * cik_irq_disable - disable interrupts for suspend
7669  *
7670  * @rdev: radeon_device pointer
7671  *
7672  * Disable interrupts and stop the RLC (CIK).
7673  * Used for suspend.
7674  */
7675 static void cik_irq_suspend(struct radeon_device *rdev)
7676 {
7677         cik_irq_disable(rdev);
7678         cik_rlc_stop(rdev);
7679 }
7680
7681 /**
7682  * cik_irq_fini - tear down interrupt support
7683  *
7684  * @rdev: radeon_device pointer
7685  *
7686  * Disable interrupts on the hw and free the IH ring
7687  * buffer (CIK).
7688  * Used for driver unload.
7689  */
7690 static void cik_irq_fini(struct radeon_device *rdev)
7691 {
7692         cik_irq_suspend(rdev);
7693         r600_ih_ring_fini(rdev);
7694 }
7695
7696 /**
7697  * cik_get_ih_wptr - get the IH ring buffer wptr
7698  *
7699  * @rdev: radeon_device pointer
7700  *
7701  * Get the IH ring buffer wptr from either the register
7702  * or the writeback memory buffer (CIK).  Also check for
7703  * ring buffer overflow and deal with it.
7704  * Used by cik_irq_process().
7705  * Returns the value of the wptr.
7706  */
7707 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7708 {
7709         u32 wptr, tmp;
7710
7711         if (rdev->wb.enabled)
7712                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7713         else
7714                 wptr = RREG32(IH_RB_WPTR);
7715
7716         if (wptr & RB_OVERFLOW) {
7717                 wptr &= ~RB_OVERFLOW;
7718                 /* When a ring buffer overflow happen start parsing interrupt
7719                  * from the last not overwritten vector (wptr + 16). Hopefully
7720                  * this should allow us to catchup.
7721                  */
7722                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7723                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7724                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7725                 tmp = RREG32(IH_RB_CNTL);
7726                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7727                 WREG32(IH_RB_CNTL, tmp);
7728         }
7729         return (wptr & rdev->ih.ptr_mask);
7730 }
7731
7732 /*        CIK IV Ring
7733  * Each IV ring entry is 128 bits:
7734  * [7:0]    - interrupt source id
7735  * [31:8]   - reserved
7736  * [59:32]  - interrupt source data
7737  * [63:60]  - reserved
7738  * [71:64]  - RINGID
7739  *            CP:
7740  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7741  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7742  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7743  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7744  *            PIPE_ID - ME0 0=3D
7745  *                    - ME1&2 compute dispatcher (4 pipes each)
7746  *            SDMA:
7747  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7748  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7749  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7750  * [79:72]  - VMID
7751  * [95:80]  - PASID
7752  * [127:96] - reserved
7753  */
7754 /**
7755  * cik_irq_process - interrupt handler
7756  *
7757  * @rdev: radeon_device pointer
7758  *
7759  * Interrupt hander (CIK).  Walk the IH ring,
7760  * ack interrupts and schedule work to handle
7761  * interrupt events.
7762  * Returns irq process return code.
7763  */
7764 int cik_irq_process(struct radeon_device *rdev)
7765 {
7766         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7767         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7768         u32 wptr;
7769         u32 rptr;
7770         u32 src_id, src_data, ring_id;
7771         u8 me_id, pipe_id, queue_id;
7772         u32 ring_index;
7773         bool queue_hotplug = false;
7774         bool queue_reset = false;
7775         u32 addr, status, mc_client;
7776         bool queue_thermal = false;
7777
7778         if (!rdev->ih.enabled || rdev->shutdown)
7779                 return IRQ_NONE;
7780
7781         wptr = cik_get_ih_wptr(rdev);
7782
7783 restart_ih:
7784         /* is somebody else already processing irqs? */
7785         if (atomic_xchg(&rdev->ih.lock, 1))
7786                 return IRQ_NONE;
7787
7788         rptr = rdev->ih.rptr;
7789         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7790
7791         /* Order reading of wptr vs. reading of IH ring data */
7792         rmb();
7793
7794         /* display interrupts */
7795         cik_irq_ack(rdev);
7796
7797         while (rptr != wptr) {
7798                 /* wptr/rptr are in bytes! */
7799                 ring_index = rptr / 4;
7800
7801                 radeon_kfd_interrupt(rdev,
7802                                 (const void *) &rdev->ih.ring[ring_index]);
7803
7804                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7805                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7806                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7807
7808                 switch (src_id) {
7809                 case 1: /* D1 vblank/vline */
7810                         switch (src_data) {
7811                         case 0: /* D1 vblank */
7812                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7813                                         if (rdev->irq.crtc_vblank_int[0]) {
7814                                                 drm_handle_vblank(rdev->ddev, 0);
7815                                                 rdev->pm.vblank_sync = true;
7816                                                 wake_up(&rdev->irq.vblank_queue);
7817                                         }
7818                                         if (atomic_read(&rdev->irq.pflip[0]))
7819                                                 radeon_crtc_handle_vblank(rdev, 0);
7820                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7821                                         DRM_DEBUG("IH: D1 vblank\n");
7822                                 }
7823                                 break;
7824                         case 1: /* D1 vline */
7825                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7826                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7827                                         DRM_DEBUG("IH: D1 vline\n");
7828                                 }
7829                                 break;
7830                         default:
7831                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7832                                 break;
7833                         }
7834                         break;
7835                 case 2: /* D2 vblank/vline */
7836                         switch (src_data) {
7837                         case 0: /* D2 vblank */
7838                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7839                                         if (rdev->irq.crtc_vblank_int[1]) {
7840                                                 drm_handle_vblank(rdev->ddev, 1);
7841                                                 rdev->pm.vblank_sync = true;
7842                                                 wake_up(&rdev->irq.vblank_queue);
7843                                         }
7844                                         if (atomic_read(&rdev->irq.pflip[1]))
7845                                                 radeon_crtc_handle_vblank(rdev, 1);
7846                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7847                                         DRM_DEBUG("IH: D2 vblank\n");
7848                                 }
7849                                 break;
7850                         case 1: /* D2 vline */
7851                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7852                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7853                                         DRM_DEBUG("IH: D2 vline\n");
7854                                 }
7855                                 break;
7856                         default:
7857                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7858                                 break;
7859                         }
7860                         break;
7861                 case 3: /* D3 vblank/vline */
7862                         switch (src_data) {
7863                         case 0: /* D3 vblank */
7864                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7865                                         if (rdev->irq.crtc_vblank_int[2]) {
7866                                                 drm_handle_vblank(rdev->ddev, 2);
7867                                                 rdev->pm.vblank_sync = true;
7868                                                 wake_up(&rdev->irq.vblank_queue);
7869                                         }
7870                                         if (atomic_read(&rdev->irq.pflip[2]))
7871                                                 radeon_crtc_handle_vblank(rdev, 2);
7872                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7873                                         DRM_DEBUG("IH: D3 vblank\n");
7874                                 }
7875                                 break;
7876                         case 1: /* D3 vline */
7877                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7878                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7879                                         DRM_DEBUG("IH: D3 vline\n");
7880                                 }
7881                                 break;
7882                         default:
7883                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7884                                 break;
7885                         }
7886                         break;
7887                 case 4: /* D4 vblank/vline */
7888                         switch (src_data) {
7889                         case 0: /* D4 vblank */
7890                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7891                                         if (rdev->irq.crtc_vblank_int[3]) {
7892                                                 drm_handle_vblank(rdev->ddev, 3);
7893                                                 rdev->pm.vblank_sync = true;
7894                                                 wake_up(&rdev->irq.vblank_queue);
7895                                         }
7896                                         if (atomic_read(&rdev->irq.pflip[3]))
7897                                                 radeon_crtc_handle_vblank(rdev, 3);
7898                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7899                                         DRM_DEBUG("IH: D4 vblank\n");
7900                                 }
7901                                 break;
7902                         case 1: /* D4 vline */
7903                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7904                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7905                                         DRM_DEBUG("IH: D4 vline\n");
7906                                 }
7907                                 break;
7908                         default:
7909                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7910                                 break;
7911                         }
7912                         break;
7913                 case 5: /* D5 vblank/vline */
7914                         switch (src_data) {
7915                         case 0: /* D5 vblank */
7916                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7917                                         if (rdev->irq.crtc_vblank_int[4]) {
7918                                                 drm_handle_vblank(rdev->ddev, 4);
7919                                                 rdev->pm.vblank_sync = true;
7920                                                 wake_up(&rdev->irq.vblank_queue);
7921                                         }
7922                                         if (atomic_read(&rdev->irq.pflip[4]))
7923                                                 radeon_crtc_handle_vblank(rdev, 4);
7924                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7925                                         DRM_DEBUG("IH: D5 vblank\n");
7926                                 }
7927                                 break;
7928                         case 1: /* D5 vline */
7929                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7930                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7931                                         DRM_DEBUG("IH: D5 vline\n");
7932                                 }
7933                                 break;
7934                         default:
7935                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7936                                 break;
7937                         }
7938                         break;
7939                 case 6: /* D6 vblank/vline */
7940                         switch (src_data) {
7941                         case 0: /* D6 vblank */
7942                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7943                                         if (rdev->irq.crtc_vblank_int[5]) {
7944                                                 drm_handle_vblank(rdev->ddev, 5);
7945                                                 rdev->pm.vblank_sync = true;
7946                                                 wake_up(&rdev->irq.vblank_queue);
7947                                         }
7948                                         if (atomic_read(&rdev->irq.pflip[5]))
7949                                                 radeon_crtc_handle_vblank(rdev, 5);
7950                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7951                                         DRM_DEBUG("IH: D6 vblank\n");
7952                                 }
7953                                 break;
7954                         case 1: /* D6 vline */
7955                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7956                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7957                                         DRM_DEBUG("IH: D6 vline\n");
7958                                 }
7959                                 break;
7960                         default:
7961                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7962                                 break;
7963                         }
7964                         break;
7965                 case 8: /* D1 page flip */
7966                 case 10: /* D2 page flip */
7967                 case 12: /* D3 page flip */
7968                 case 14: /* D4 page flip */
7969                 case 16: /* D5 page flip */
7970                 case 18: /* D6 page flip */
7971                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7972                         if (radeon_use_pflipirq > 0)
7973                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7974                         break;
7975                 case 42: /* HPD hotplug */
7976                         switch (src_data) {
7977                         case 0:
7978                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7979                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7980                                         queue_hotplug = true;
7981                                         DRM_DEBUG("IH: HPD1\n");
7982                                 }
7983                                 break;
7984                         case 1:
7985                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7986                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7987                                         queue_hotplug = true;
7988                                         DRM_DEBUG("IH: HPD2\n");
7989                                 }
7990                                 break;
7991                         case 2:
7992                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7993                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7994                                         queue_hotplug = true;
7995                                         DRM_DEBUG("IH: HPD3\n");
7996                                 }
7997                                 break;
7998                         case 3:
7999                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8000                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8001                                         queue_hotplug = true;
8002                                         DRM_DEBUG("IH: HPD4\n");
8003                                 }
8004                                 break;
8005                         case 4:
8006                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8007                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8008                                         queue_hotplug = true;
8009                                         DRM_DEBUG("IH: HPD5\n");
8010                                 }
8011                                 break;
8012                         case 5:
8013                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8014                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8015                                         queue_hotplug = true;
8016                                         DRM_DEBUG("IH: HPD6\n");
8017                                 }
8018                                 break;
8019                         default:
8020                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8021                                 break;
8022                         }
8023                         break;
8024                 case 124: /* UVD */
8025                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8026                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8027                         break;
8028                 case 146:
8029                 case 147:
8030                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8031                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8032                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8033                         /* reset addr and status */
8034                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8035                         if (addr == 0x0 && status == 0x0)
8036                                 break;
8037                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8038                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8039                                 addr);
8040                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8041                                 status);
8042                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8043                         break;
8044                 case 167: /* VCE */
8045                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8046                         switch (src_data) {
8047                         case 0:
8048                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8049                                 break;
8050                         case 1:
8051                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8052                                 break;
8053                         default:
8054                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8055                                 break;
8056                         }
8057                         break;
8058                 case 176: /* GFX RB CP_INT */
8059                 case 177: /* GFX IB CP_INT */
8060                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8061                         break;
8062                 case 181: /* CP EOP event */
8063                         DRM_DEBUG("IH: CP EOP\n");
8064                         /* XXX check the bitfield order! */
8065                         me_id = (ring_id & 0x60) >> 5;
8066                         pipe_id = (ring_id & 0x18) >> 3;
8067                         queue_id = (ring_id & 0x7) >> 0;
8068                         switch (me_id) {
8069                         case 0:
8070                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8071                                 break;
8072                         case 1:
8073                         case 2:
8074                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8075                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8076                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8077                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8078                                 break;
8079                         }
8080                         break;
8081                 case 184: /* CP Privileged reg access */
8082                         DRM_ERROR("Illegal register access in command stream\n");
8083                         /* XXX check the bitfield order! */
8084                         me_id = (ring_id & 0x60) >> 5;
8085                         pipe_id = (ring_id & 0x18) >> 3;
8086                         queue_id = (ring_id & 0x7) >> 0;
8087                         switch (me_id) {
8088                         case 0:
8089                                 /* This results in a full GPU reset, but all we need to do is soft
8090                                  * reset the CP for gfx
8091                                  */
8092                                 queue_reset = true;
8093                                 break;
8094                         case 1:
8095                                 /* XXX compute */
8096                                 queue_reset = true;
8097                                 break;
8098                         case 2:
8099                                 /* XXX compute */
8100                                 queue_reset = true;
8101                                 break;
8102                         }
8103                         break;
8104                 case 185: /* CP Privileged inst */
8105                         DRM_ERROR("Illegal instruction in command stream\n");
8106                         /* XXX check the bitfield order! */
8107                         me_id = (ring_id & 0x60) >> 5;
8108                         pipe_id = (ring_id & 0x18) >> 3;
8109                         queue_id = (ring_id & 0x7) >> 0;
8110                         switch (me_id) {
8111                         case 0:
8112                                 /* This results in a full GPU reset, but all we need to do is soft
8113                                  * reset the CP for gfx
8114                                  */
8115                                 queue_reset = true;
8116                                 break;
8117                         case 1:
8118                                 /* XXX compute */
8119                                 queue_reset = true;
8120                                 break;
8121                         case 2:
8122                                 /* XXX compute */
8123                                 queue_reset = true;
8124                                 break;
8125                         }
8126                         break;
8127                 case 224: /* SDMA trap event */
8128                         /* XXX check the bitfield order! */
8129                         me_id = (ring_id & 0x3) >> 0;
8130                         queue_id = (ring_id & 0xc) >> 2;
8131                         DRM_DEBUG("IH: SDMA trap\n");
8132                         switch (me_id) {
8133                         case 0:
8134                                 switch (queue_id) {
8135                                 case 0:
8136                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8137                                         break;
8138                                 case 1:
8139                                         /* XXX compute */
8140                                         break;
8141                                 case 2:
8142                                         /* XXX compute */
8143                                         break;
8144                                 }
8145                                 break;
8146                         case 1:
8147                                 switch (queue_id) {
8148                                 case 0:
8149                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8150                                         break;
8151                                 case 1:
8152                                         /* XXX compute */
8153                                         break;
8154                                 case 2:
8155                                         /* XXX compute */
8156                                         break;
8157                                 }
8158                                 break;
8159                         }
8160                         break;
8161                 case 230: /* thermal low to high */
8162                         DRM_DEBUG("IH: thermal low to high\n");
8163                         rdev->pm.dpm.thermal.high_to_low = false;
8164                         queue_thermal = true;
8165                         break;
8166                 case 231: /* thermal high to low */
8167                         DRM_DEBUG("IH: thermal high to low\n");
8168                         rdev->pm.dpm.thermal.high_to_low = true;
8169                         queue_thermal = true;
8170                         break;
8171                 case 233: /* GUI IDLE */
8172                         DRM_DEBUG("IH: GUI idle\n");
8173                         break;
8174                 case 241: /* SDMA Privileged inst */
8175                 case 247: /* SDMA Privileged inst */
8176                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8177                         /* XXX check the bitfield order! */
8178                         me_id = (ring_id & 0x3) >> 0;
8179                         queue_id = (ring_id & 0xc) >> 2;
8180                         switch (me_id) {
8181                         case 0:
8182                                 switch (queue_id) {
8183                                 case 0:
8184                                         queue_reset = true;
8185                                         break;
8186                                 case 1:
8187                                         /* XXX compute */
8188                                         queue_reset = true;
8189                                         break;
8190                                 case 2:
8191                                         /* XXX compute */
8192                                         queue_reset = true;
8193                                         break;
8194                                 }
8195                                 break;
8196                         case 1:
8197                                 switch (queue_id) {
8198                                 case 0:
8199                                         queue_reset = true;
8200                                         break;
8201                                 case 1:
8202                                         /* XXX compute */
8203                                         queue_reset = true;
8204                                         break;
8205                                 case 2:
8206                                         /* XXX compute */
8207                                         queue_reset = true;
8208                                         break;
8209                                 }
8210                                 break;
8211                         }
8212                         break;
8213                 default:
8214                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8215                         break;
8216                 }
8217
8218                 /* wptr/rptr are in bytes! */
8219                 rptr += 16;
8220                 rptr &= rdev->ih.ptr_mask;
8221                 WREG32(IH_RB_RPTR, rptr);
8222         }
8223         if (queue_hotplug)
8224                 schedule_work(&rdev->hotplug_work);
8225         if (queue_reset) {
8226                 rdev->needs_reset = true;
8227                 wake_up_all(&rdev->fence_queue);
8228         }
8229         if (queue_thermal)
8230                 schedule_work(&rdev->pm.dpm.thermal.work);
8231         rdev->ih.rptr = rptr;
8232         atomic_set(&rdev->ih.lock, 0);
8233
8234         /* make sure wptr hasn't changed while processing */
8235         wptr = cik_get_ih_wptr(rdev);
8236         if (wptr != rptr)
8237                 goto restart_ih;
8238
8239         return IRQ_HANDLED;
8240 }
8241
8242 /*
8243  * startup/shutdown callbacks
8244  */
8245 /**
8246  * cik_startup - program the asic to a functional state
8247  *
8248  * @rdev: radeon_device pointer
8249  *
8250  * Programs the asic to a functional state (CIK).
8251  * Called by cik_init() and cik_resume().
8252  * Returns 0 for success, error for failure.
8253  */
8254 static int cik_startup(struct radeon_device *rdev)
8255 {
8256         struct radeon_ring *ring;
8257         u32 nop;
8258         int r;
8259
8260         /* enable pcie gen2/3 link */
8261         cik_pcie_gen3_enable(rdev);
8262         /* enable aspm */
8263         cik_program_aspm(rdev);
8264
8265         /* scratch needs to be initialized before MC */
8266         r = r600_vram_scratch_init(rdev);
8267         if (r)
8268                 return r;
8269
8270         cik_mc_program(rdev);
8271
8272         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8273                 r = ci_mc_load_microcode(rdev);
8274                 if (r) {
8275                         DRM_ERROR("Failed to load MC firmware!\n");
8276                         return r;
8277                 }
8278         }
8279
8280         r = cik_pcie_gart_enable(rdev);
8281         if (r)
8282                 return r;
8283         cik_gpu_init(rdev);
8284
8285         /* allocate rlc buffers */
8286         if (rdev->flags & RADEON_IS_IGP) {
8287                 if (rdev->family == CHIP_KAVERI) {
8288                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8289                         rdev->rlc.reg_list_size =
8290                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8291                 } else {
8292                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8293                         rdev->rlc.reg_list_size =
8294                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8295                 }
8296         }
8297         rdev->rlc.cs_data = ci_cs_data;
8298         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8299         r = sumo_rlc_init(rdev);
8300         if (r) {
8301                 DRM_ERROR("Failed to init rlc BOs!\n");
8302                 return r;
8303         }
8304
8305         /* allocate wb buffer */
8306         r = radeon_wb_init(rdev);
8307         if (r)
8308                 return r;
8309
8310         /* allocate mec buffers */
8311         r = cik_mec_init(rdev);
8312         if (r) {
8313                 DRM_ERROR("Failed to init MEC BOs!\n");
8314                 return r;
8315         }
8316
8317         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8318         if (r) {
8319                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8320                 return r;
8321         }
8322
8323         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8324         if (r) {
8325                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8326                 return r;
8327         }
8328
8329         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8330         if (r) {
8331                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8332                 return r;
8333         }
8334
8335         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8336         if (r) {
8337                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8338                 return r;
8339         }
8340
8341         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8342         if (r) {
8343                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8344                 return r;
8345         }
8346
8347         r = radeon_uvd_resume(rdev);
8348         if (!r) {
8349                 r = uvd_v4_2_resume(rdev);
8350                 if (!r) {
8351                         r = radeon_fence_driver_start_ring(rdev,
8352                                                            R600_RING_TYPE_UVD_INDEX);
8353                         if (r)
8354                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8355                 }
8356         }
8357         if (r)
8358                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8359
8360         r = radeon_vce_resume(rdev);
8361         if (!r) {
8362                 r = vce_v2_0_resume(rdev);
8363                 if (!r)
8364                         r = radeon_fence_driver_start_ring(rdev,
8365                                                            TN_RING_TYPE_VCE1_INDEX);
8366                 if (!r)
8367                         r = radeon_fence_driver_start_ring(rdev,
8368                                                            TN_RING_TYPE_VCE2_INDEX);
8369         }
8370         if (r) {
8371                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8372                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8373                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8374         }
8375
8376         /* Enable IRQ */
8377         if (!rdev->irq.installed) {
8378                 r = radeon_irq_kms_init(rdev);
8379                 if (r)
8380                         return r;
8381         }
8382
8383         r = cik_irq_init(rdev);
8384         if (r) {
8385                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8386                 radeon_irq_kms_fini(rdev);
8387                 return r;
8388         }
8389         cik_irq_set(rdev);
8390
8391         if (rdev->family == CHIP_HAWAII) {
8392                 if (rdev->new_fw)
8393                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8394                 else
8395                         nop = RADEON_CP_PACKET2;
8396         } else {
8397                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8398         }
8399
8400         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8401         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8402                              nop);
8403         if (r)
8404                 return r;
8405
8406         /* set up the compute queues */
8407         /* type-2 packets are deprecated on MEC, use type-3 instead */
8408         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8409         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8410                              nop);
8411         if (r)
8412                 return r;
8413         ring->me = 1; /* first MEC */
8414         ring->pipe = 0; /* first pipe */
8415         ring->queue = 0; /* first queue */
8416         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8417
8418         /* type-2 packets are deprecated on MEC, use type-3 instead */
8419         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8420         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8421                              nop);
8422         if (r)
8423                 return r;
8424         /* dGPU only have 1 MEC */
8425         ring->me = 1; /* first MEC */
8426         ring->pipe = 0; /* first pipe */
8427         ring->queue = 1; /* second queue */
8428         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8429
8430         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8431         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8432                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8433         if (r)
8434                 return r;
8435
8436         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8437         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8438                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8439         if (r)
8440                 return r;
8441
8442         r = cik_cp_resume(rdev);
8443         if (r)
8444                 return r;
8445
8446         r = cik_sdma_resume(rdev);
8447         if (r)
8448                 return r;
8449
8450         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8451         if (ring->ring_size) {
8452                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8453                                      RADEON_CP_PACKET2);
8454                 if (!r)
8455                         r = uvd_v1_0_init(rdev);
8456                 if (r)
8457                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8458         }
8459
8460         r = -ENOENT;
8461
8462         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8463         if (ring->ring_size)
8464                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8465                                      VCE_CMD_NO_OP);
8466
8467         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8468         if (ring->ring_size)
8469                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8470                                      VCE_CMD_NO_OP);
8471
8472         if (!r)
8473                 r = vce_v1_0_init(rdev);
8474         else if (r != -ENOENT)
8475                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8476
8477         r = radeon_ib_pool_init(rdev);
8478         if (r) {
8479                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8480                 return r;
8481         }
8482
8483         r = radeon_vm_manager_init(rdev);
8484         if (r) {
8485                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8486                 return r;
8487         }
8488
8489         r = dce6_audio_init(rdev);
8490         if (r)
8491                 return r;
8492
8493         r = radeon_kfd_resume(rdev);
8494         if (r)
8495                 return r;
8496
8497         return 0;
8498 }
8499
8500 /**
8501  * cik_resume - resume the asic to a functional state
8502  *
8503  * @rdev: radeon_device pointer
8504  *
8505  * Programs the asic to a functional state (CIK).
8506  * Called at resume.
8507  * Returns 0 for success, error for failure.
8508  */
8509 int cik_resume(struct radeon_device *rdev)
8510 {
8511         int r;
8512
8513         /* post card */
8514         atom_asic_init(rdev->mode_info.atom_context);
8515
8516         /* init golden registers */
8517         cik_init_golden_registers(rdev);
8518
8519         if (rdev->pm.pm_method == PM_METHOD_DPM)
8520                 radeon_pm_resume(rdev);
8521
8522         rdev->accel_working = true;
8523         r = cik_startup(rdev);
8524         if (r) {
8525                 DRM_ERROR("cik startup failed on resume\n");
8526                 rdev->accel_working = false;
8527                 return r;
8528         }
8529
8530         return r;
8531
8532 }
8533
8534 /**
8535  * cik_suspend - suspend the asic
8536  *
8537  * @rdev: radeon_device pointer
8538  *
8539  * Bring the chip into a state suitable for suspend (CIK).
8540  * Called at suspend.
8541  * Returns 0 for success.
8542  */
8543 int cik_suspend(struct radeon_device *rdev)
8544 {
8545         radeon_kfd_suspend(rdev);
8546         radeon_pm_suspend(rdev);
8547         dce6_audio_fini(rdev);
8548         radeon_vm_manager_fini(rdev);
8549         cik_cp_enable(rdev, false);
8550         cik_sdma_enable(rdev, false);
8551         uvd_v1_0_fini(rdev);
8552         radeon_uvd_suspend(rdev);
8553         radeon_vce_suspend(rdev);
8554         cik_fini_pg(rdev);
8555         cik_fini_cg(rdev);
8556         cik_irq_suspend(rdev);
8557         radeon_wb_disable(rdev);
8558         cik_pcie_gart_disable(rdev);
8559         return 0;
8560 }
8561
8562 /* Plan is to move initialization in that function and use
8563  * helper function so that radeon_device_init pretty much
8564  * do nothing more than calling asic specific function. This
8565  * should also allow to remove a bunch of callback function
8566  * like vram_info.
8567  */
8568 /**
8569  * cik_init - asic specific driver and hw init
8570  *
8571  * @rdev: radeon_device pointer
8572  *
8573  * Setup asic specific driver variables and program the hw
8574  * to a functional state (CIK).
8575  * Called at driver startup.
8576  * Returns 0 for success, errors for failure.
8577  */
8578 int cik_init(struct radeon_device *rdev)
8579 {
8580         struct radeon_ring *ring;
8581         int r;
8582
8583         /* Read BIOS */
8584         if (!radeon_get_bios(rdev)) {
8585                 if (ASIC_IS_AVIVO(rdev))
8586                         return -EINVAL;
8587         }
8588         /* Must be an ATOMBIOS */
8589         if (!rdev->is_atom_bios) {
8590                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8591                 return -EINVAL;
8592         }
8593         r = radeon_atombios_init(rdev);
8594         if (r)
8595                 return r;
8596
8597         /* Post card if necessary */
8598         if (!radeon_card_posted(rdev)) {
8599                 if (!rdev->bios) {
8600                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8601                         return -EINVAL;
8602                 }
8603                 DRM_INFO("GPU not posted. posting now...\n");
8604                 atom_asic_init(rdev->mode_info.atom_context);
8605         }
8606         /* init golden registers */
8607         cik_init_golden_registers(rdev);
8608         /* Initialize scratch registers */
8609         cik_scratch_init(rdev);
8610         /* Initialize surface registers */
8611         radeon_surface_init(rdev);
8612         /* Initialize clocks */
8613         radeon_get_clock_info(rdev->ddev);
8614
8615         /* Fence driver */
8616         r = radeon_fence_driver_init(rdev);
8617         if (r)
8618                 return r;
8619
8620         /* initialize memory controller */
8621         r = cik_mc_init(rdev);
8622         if (r)
8623                 return r;
8624         /* Memory manager */
8625         r = radeon_bo_init(rdev);
8626         if (r)
8627                 return r;
8628
8629         if (rdev->flags & RADEON_IS_IGP) {
8630                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8631                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8632                         r = cik_init_microcode(rdev);
8633                         if (r) {
8634                                 DRM_ERROR("Failed to load firmware!\n");
8635                                 return r;
8636                         }
8637                 }
8638         } else {
8639                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8640                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8641                     !rdev->mc_fw) {
8642                         r = cik_init_microcode(rdev);
8643                         if (r) {
8644                                 DRM_ERROR("Failed to load firmware!\n");
8645                                 return r;
8646                         }
8647                 }
8648         }
8649
8650         /* Initialize power management */
8651         radeon_pm_init(rdev);
8652
8653         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8654         ring->ring_obj = NULL;
8655         r600_ring_init(rdev, ring, 1024 * 1024);
8656
8657         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8658         ring->ring_obj = NULL;
8659         r600_ring_init(rdev, ring, 1024 * 1024);
8660         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8661         if (r)
8662                 return r;
8663
8664         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8665         ring->ring_obj = NULL;
8666         r600_ring_init(rdev, ring, 1024 * 1024);
8667         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8668         if (r)
8669                 return r;
8670
8671         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8672         ring->ring_obj = NULL;
8673         r600_ring_init(rdev, ring, 256 * 1024);
8674
8675         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8676         ring->ring_obj = NULL;
8677         r600_ring_init(rdev, ring, 256 * 1024);
8678
8679         r = radeon_uvd_init(rdev);
8680         if (!r) {
8681                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8682                 ring->ring_obj = NULL;
8683                 r600_ring_init(rdev, ring, 4096);
8684         }
8685
8686         r = radeon_vce_init(rdev);
8687         if (!r) {
8688                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8689                 ring->ring_obj = NULL;
8690                 r600_ring_init(rdev, ring, 4096);
8691
8692                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8693                 ring->ring_obj = NULL;
8694                 r600_ring_init(rdev, ring, 4096);
8695         }
8696
8697         rdev->ih.ring_obj = NULL;
8698         r600_ih_ring_init(rdev, 64 * 1024);
8699
8700         r = r600_pcie_gart_init(rdev);
8701         if (r)
8702                 return r;
8703
8704         rdev->accel_working = true;
8705         r = cik_startup(rdev);
8706         if (r) {
8707                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8708                 cik_cp_fini(rdev);
8709                 cik_sdma_fini(rdev);
8710                 cik_irq_fini(rdev);
8711                 sumo_rlc_fini(rdev);
8712                 cik_mec_fini(rdev);
8713                 radeon_wb_fini(rdev);
8714                 radeon_ib_pool_fini(rdev);
8715                 radeon_vm_manager_fini(rdev);
8716                 radeon_irq_kms_fini(rdev);
8717                 cik_pcie_gart_fini(rdev);
8718                 rdev->accel_working = false;
8719         }
8720
8721         /* Don't start up if the MC ucode is missing.
8722          * The default clocks and voltages before the MC ucode
8723          * is loaded are not suffient for advanced operations.
8724          */
8725         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8726                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8727                 return -EINVAL;
8728         }
8729
8730         return 0;
8731 }
8732
8733 /**
8734  * cik_fini - asic specific driver and hw fini
8735  *
8736  * @rdev: radeon_device pointer
8737  *
8738  * Tear down the asic specific driver variables and program the hw
8739  * to an idle state (CIK).
8740  * Called at driver unload.
8741  */
8742 void cik_fini(struct radeon_device *rdev)
8743 {
8744         radeon_pm_fini(rdev);
8745         cik_cp_fini(rdev);
8746         cik_sdma_fini(rdev);
8747         cik_fini_pg(rdev);
8748         cik_fini_cg(rdev);
8749         cik_irq_fini(rdev);
8750         sumo_rlc_fini(rdev);
8751         cik_mec_fini(rdev);
8752         radeon_wb_fini(rdev);
8753         radeon_vm_manager_fini(rdev);
8754         radeon_ib_pool_fini(rdev);
8755         radeon_irq_kms_fini(rdev);
8756         uvd_v1_0_fini(rdev);
8757         radeon_uvd_fini(rdev);
8758         radeon_vce_fini(rdev);
8759         cik_pcie_gart_fini(rdev);
8760         r600_vram_scratch_fini(rdev);
8761         radeon_gem_fini(rdev);
8762         radeon_fence_driver_fini(rdev);
8763         radeon_bo_fini(rdev);
8764         radeon_atombios_fini(rdev);
8765         kfree(rdev->bios);
8766         rdev->bios = NULL;
8767 }
8768
8769 void dce8_program_fmt(struct drm_encoder *encoder)
8770 {
8771         struct drm_device *dev = encoder->dev;
8772         struct radeon_device *rdev = dev->dev_private;
8773         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8774         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8775         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8776         int bpc = 0;
8777         u32 tmp = 0;
8778         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8779
8780         if (connector) {
8781                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8782                 bpc = radeon_get_monitor_bpc(connector);
8783                 dither = radeon_connector->dither;
8784         }
8785
8786         /* LVDS/eDP FMT is set up by atom */
8787         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8788                 return;
8789
8790         /* not needed for analog */
8791         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8792             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8793                 return;
8794
8795         if (bpc == 0)
8796                 return;
8797
8798         switch (bpc) {
8799         case 6:
8800                 if (dither == RADEON_FMT_DITHER_ENABLE)
8801                         /* XXX sort out optimal dither settings */
8802                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8803                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8804                 else
8805                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8806                 break;
8807         case 8:
8808                 if (dither == RADEON_FMT_DITHER_ENABLE)
8809                         /* XXX sort out optimal dither settings */
8810                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8811                                 FMT_RGB_RANDOM_ENABLE |
8812                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8813                 else
8814                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8815                 break;
8816         case 10:
8817                 if (dither == RADEON_FMT_DITHER_ENABLE)
8818                         /* XXX sort out optimal dither settings */
8819                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8820                                 FMT_RGB_RANDOM_ENABLE |
8821                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8822                 else
8823                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8824                 break;
8825         default:
8826                 /* not needed */
8827                 break;
8828         }
8829
8830         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8831 }
8832
8833 /* display watermark setup */
8834 /**
8835  * dce8_line_buffer_adjust - Set up the line buffer
8836  *
8837  * @rdev: radeon_device pointer
8838  * @radeon_crtc: the selected display controller
8839  * @mode: the current display mode on the selected display
8840  * controller
8841  *
8842  * Setup up the line buffer allocation for
8843  * the selected display controller (CIK).
8844  * Returns the line buffer size in pixels.
8845  */
8846 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8847                                    struct radeon_crtc *radeon_crtc,
8848                                    struct drm_display_mode *mode)
8849 {
8850         u32 tmp, buffer_alloc, i;
8851         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8852         /*
8853          * Line Buffer Setup
8854          * There are 6 line buffers, one for each display controllers.
8855          * There are 3 partitions per LB. Select the number of partitions
8856          * to enable based on the display width.  For display widths larger
8857          * than 4096, you need use to use 2 display controllers and combine
8858          * them using the stereo blender.
8859          */
8860         if (radeon_crtc->base.enabled && mode) {
8861                 if (mode->crtc_hdisplay < 1920) {
8862                         tmp = 1;
8863                         buffer_alloc = 2;
8864                 } else if (mode->crtc_hdisplay < 2560) {
8865                         tmp = 2;
8866                         buffer_alloc = 2;
8867                 } else if (mode->crtc_hdisplay < 4096) {
8868                         tmp = 0;
8869                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8870                 } else {
8871                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8872                         tmp = 0;
8873                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8874                 }
8875         } else {
8876                 tmp = 1;
8877                 buffer_alloc = 0;
8878         }
8879
8880         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8881                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8882
8883         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8884                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8885         for (i = 0; i < rdev->usec_timeout; i++) {
8886                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8887                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8888                         break;
8889                 udelay(1);
8890         }
8891
8892         if (radeon_crtc->base.enabled && mode) {
8893                 switch (tmp) {
8894                 case 0:
8895                 default:
8896                         return 4096 * 2;
8897                 case 1:
8898                         return 1920 * 2;
8899                 case 2:
8900                         return 2560 * 2;
8901                 }
8902         }
8903
8904         /* controller not enabled, so no lb used */
8905         return 0;
8906 }
8907
8908 /**
8909  * cik_get_number_of_dram_channels - get the number of dram channels
8910  *
8911  * @rdev: radeon_device pointer
8912  *
8913  * Look up the number of video ram channels (CIK).
8914  * Used for display watermark bandwidth calculations
8915  * Returns the number of dram channels
8916  */
8917 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8918 {
8919         u32 tmp = RREG32(MC_SHARED_CHMAP);
8920
8921         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8922         case 0:
8923         default:
8924                 return 1;
8925         case 1:
8926                 return 2;
8927         case 2:
8928                 return 4;
8929         case 3:
8930                 return 8;
8931         case 4:
8932                 return 3;
8933         case 5:
8934                 return 6;
8935         case 6:
8936                 return 10;
8937         case 7:
8938                 return 12;
8939         case 8:
8940                 return 16;
8941         }
8942 }
8943
8944 struct dce8_wm_params {
8945         u32 dram_channels; /* number of dram channels */
8946         u32 yclk;          /* bandwidth per dram data pin in kHz */
8947         u32 sclk;          /* engine clock in kHz */
8948         u32 disp_clk;      /* display clock in kHz */
8949         u32 src_width;     /* viewport width */
8950         u32 active_time;   /* active display time in ns */
8951         u32 blank_time;    /* blank time in ns */
8952         bool interlaced;    /* mode is interlaced */
8953         fixed20_12 vsc;    /* vertical scale ratio */
8954         u32 num_heads;     /* number of active crtcs */
8955         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8956         u32 lb_size;       /* line buffer allocated to pipe */
8957         u32 vtaps;         /* vertical scaler taps */
8958 };
8959
8960 /**
8961  * dce8_dram_bandwidth - get the dram bandwidth
8962  *
8963  * @wm: watermark calculation data
8964  *
8965  * Calculate the raw dram bandwidth (CIK).
8966  * Used for display watermark bandwidth calculations
8967  * Returns the dram bandwidth in MBytes/s
8968  */
8969 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8970 {
8971         /* Calculate raw DRAM Bandwidth */
8972         fixed20_12 dram_efficiency; /* 0.7 */
8973         fixed20_12 yclk, dram_channels, bandwidth;
8974         fixed20_12 a;
8975
8976         a.full = dfixed_const(1000);
8977         yclk.full = dfixed_const(wm->yclk);
8978         yclk.full = dfixed_div(yclk, a);
8979         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8980         a.full = dfixed_const(10);
8981         dram_efficiency.full = dfixed_const(7);
8982         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8983         bandwidth.full = dfixed_mul(dram_channels, yclk);
8984         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8985
8986         return dfixed_trunc(bandwidth);
8987 }
8988
8989 /**
8990  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8991  *
8992  * @wm: watermark calculation data
8993  *
8994  * Calculate the dram bandwidth used for display (CIK).
8995  * Used for display watermark bandwidth calculations
8996  * Returns the dram bandwidth for display in MBytes/s
8997  */
8998 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8999 {
9000         /* Calculate DRAM Bandwidth and the part allocated to display. */
9001         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9002         fixed20_12 yclk, dram_channels, bandwidth;
9003         fixed20_12 a;
9004
9005         a.full = dfixed_const(1000);
9006         yclk.full = dfixed_const(wm->yclk);
9007         yclk.full = dfixed_div(yclk, a);
9008         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9009         a.full = dfixed_const(10);
9010         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9011         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9012         bandwidth.full = dfixed_mul(dram_channels, yclk);
9013         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9014
9015         return dfixed_trunc(bandwidth);
9016 }
9017
9018 /**
9019  * dce8_data_return_bandwidth - get the data return bandwidth
9020  *
9021  * @wm: watermark calculation data
9022  *
9023  * Calculate the data return bandwidth used for display (CIK).
9024  * Used for display watermark bandwidth calculations
9025  * Returns the data return bandwidth in MBytes/s
9026  */
9027 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9028 {
9029         /* Calculate the display Data return Bandwidth */
9030         fixed20_12 return_efficiency; /* 0.8 */
9031         fixed20_12 sclk, bandwidth;
9032         fixed20_12 a;
9033
9034         a.full = dfixed_const(1000);
9035         sclk.full = dfixed_const(wm->sclk);
9036         sclk.full = dfixed_div(sclk, a);
9037         a.full = dfixed_const(10);
9038         return_efficiency.full = dfixed_const(8);
9039         return_efficiency.full = dfixed_div(return_efficiency, a);
9040         a.full = dfixed_const(32);
9041         bandwidth.full = dfixed_mul(a, sclk);
9042         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9043
9044         return dfixed_trunc(bandwidth);
9045 }
9046
9047 /**
9048  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9049  *
9050  * @wm: watermark calculation data
9051  *
9052  * Calculate the dmif bandwidth used for display (CIK).
9053  * Used for display watermark bandwidth calculations
9054  * Returns the dmif bandwidth in MBytes/s
9055  */
9056 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9057 {
9058         /* Calculate the DMIF Request Bandwidth */
9059         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9060         fixed20_12 disp_clk, bandwidth;
9061         fixed20_12 a, b;
9062
9063         a.full = dfixed_const(1000);
9064         disp_clk.full = dfixed_const(wm->disp_clk);
9065         disp_clk.full = dfixed_div(disp_clk, a);
9066         a.full = dfixed_const(32);
9067         b.full = dfixed_mul(a, disp_clk);
9068
9069         a.full = dfixed_const(10);
9070         disp_clk_request_efficiency.full = dfixed_const(8);
9071         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9072
9073         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9074
9075         return dfixed_trunc(bandwidth);
9076 }
9077
9078 /**
9079  * dce8_available_bandwidth - get the min available bandwidth
9080  *
9081  * @wm: watermark calculation data
9082  *
9083  * Calculate the min available bandwidth used for display (CIK).
9084  * Used for display watermark bandwidth calculations
9085  * Returns the min available bandwidth in MBytes/s
9086  */
9087 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9088 {
9089         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9090         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9091         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9092         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9093
9094         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9095 }
9096
9097 /**
9098  * dce8_average_bandwidth - get the average available bandwidth
9099  *
9100  * @wm: watermark calculation data
9101  *
9102  * Calculate the average available bandwidth used for display (CIK).
9103  * Used for display watermark bandwidth calculations
9104  * Returns the average available bandwidth in MBytes/s
9105  */
9106 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9107 {
9108         /* Calculate the display mode Average Bandwidth
9109          * DisplayMode should contain the source and destination dimensions,
9110          * timing, etc.
9111          */
9112         fixed20_12 bpp;
9113         fixed20_12 line_time;
9114         fixed20_12 src_width;
9115         fixed20_12 bandwidth;
9116         fixed20_12 a;
9117
9118         a.full = dfixed_const(1000);
9119         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9120         line_time.full = dfixed_div(line_time, a);
9121         bpp.full = dfixed_const(wm->bytes_per_pixel);
9122         src_width.full = dfixed_const(wm->src_width);
9123         bandwidth.full = dfixed_mul(src_width, bpp);
9124         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9125         bandwidth.full = dfixed_div(bandwidth, line_time);
9126
9127         return dfixed_trunc(bandwidth);
9128 }
9129
9130 /**
9131  * dce8_latency_watermark - get the latency watermark
9132  *
9133  * @wm: watermark calculation data
9134  *
9135  * Calculate the latency watermark (CIK).
9136  * Used for display watermark bandwidth calculations
9137  * Returns the latency watermark in ns
9138  */
9139 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9140 {
9141         /* First calculate the latency in ns */
9142         u32 mc_latency = 2000; /* 2000 ns. */
9143         u32 available_bandwidth = dce8_available_bandwidth(wm);
9144         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9145         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9146         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9147         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9148                 (wm->num_heads * cursor_line_pair_return_time);
9149         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9150         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9151         u32 tmp, dmif_size = 12288;
9152         fixed20_12 a, b, c;
9153
9154         if (wm->num_heads == 0)
9155                 return 0;
9156
9157         a.full = dfixed_const(2);
9158         b.full = dfixed_const(1);
9159         if ((wm->vsc.full > a.full) ||
9160             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9161             (wm->vtaps >= 5) ||
9162             ((wm->vsc.full >= a.full) && wm->interlaced))
9163                 max_src_lines_per_dst_line = 4;
9164         else
9165                 max_src_lines_per_dst_line = 2;
9166
9167         a.full = dfixed_const(available_bandwidth);
9168         b.full = dfixed_const(wm->num_heads);
9169         a.full = dfixed_div(a, b);
9170
9171         b.full = dfixed_const(mc_latency + 512);
9172         c.full = dfixed_const(wm->disp_clk);
9173         b.full = dfixed_div(b, c);
9174
9175         c.full = dfixed_const(dmif_size);
9176         b.full = dfixed_div(c, b);
9177
9178         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9179
9180         b.full = dfixed_const(1000);
9181         c.full = dfixed_const(wm->disp_clk);
9182         b.full = dfixed_div(c, b);
9183         c.full = dfixed_const(wm->bytes_per_pixel);
9184         b.full = dfixed_mul(b, c);
9185
9186         lb_fill_bw = min(tmp, dfixed_trunc(b));
9187
9188         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9189         b.full = dfixed_const(1000);
9190         c.full = dfixed_const(lb_fill_bw);
9191         b.full = dfixed_div(c, b);
9192         a.full = dfixed_div(a, b);
9193         line_fill_time = dfixed_trunc(a);
9194
9195         if (line_fill_time < wm->active_time)
9196                 return latency;
9197         else
9198                 return latency + (line_fill_time - wm->active_time);
9199
9200 }
9201
9202 /**
9203  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9204  * average and available dram bandwidth
9205  *
9206  * @wm: watermark calculation data
9207  *
9208  * Check if the display average bandwidth fits in the display
9209  * dram bandwidth (CIK).
9210  * Used for display watermark bandwidth calculations
9211  * Returns true if the display fits, false if not.
9212  */
9213 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9214 {
9215         if (dce8_average_bandwidth(wm) <=
9216             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9217                 return true;
9218         else
9219                 return false;
9220 }
9221
9222 /**
9223  * dce8_average_bandwidth_vs_available_bandwidth - check
9224  * average and available bandwidth
9225  *
9226  * @wm: watermark calculation data
9227  *
9228  * Check if the display average bandwidth fits in the display
9229  * available bandwidth (CIK).
9230  * Used for display watermark bandwidth calculations
9231  * Returns true if the display fits, false if not.
9232  */
9233 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9234 {
9235         if (dce8_average_bandwidth(wm) <=
9236             (dce8_available_bandwidth(wm) / wm->num_heads))
9237                 return true;
9238         else
9239                 return false;
9240 }
9241
9242 /**
9243  * dce8_check_latency_hiding - check latency hiding
9244  *
9245  * @wm: watermark calculation data
9246  *
9247  * Check latency hiding (CIK).
9248  * Used for display watermark bandwidth calculations
9249  * Returns true if the display fits, false if not.
9250  */
9251 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9252 {
9253         u32 lb_partitions = wm->lb_size / wm->src_width;
9254         u32 line_time = wm->active_time + wm->blank_time;
9255         u32 latency_tolerant_lines;
9256         u32 latency_hiding;
9257         fixed20_12 a;
9258
9259         a.full = dfixed_const(1);
9260         if (wm->vsc.full > a.full)
9261                 latency_tolerant_lines = 1;
9262         else {
9263                 if (lb_partitions <= (wm->vtaps + 1))
9264                         latency_tolerant_lines = 1;
9265                 else
9266                         latency_tolerant_lines = 2;
9267         }
9268
9269         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9270
9271         if (dce8_latency_watermark(wm) <= latency_hiding)
9272                 return true;
9273         else
9274                 return false;
9275 }
9276
9277 /**
9278  * dce8_program_watermarks - program display watermarks
9279  *
9280  * @rdev: radeon_device pointer
9281  * @radeon_crtc: the selected display controller
9282  * @lb_size: line buffer size
9283  * @num_heads: number of display controllers in use
9284  *
9285  * Calculate and program the display watermarks for the
9286  * selected display controller (CIK).
9287  */
9288 static void dce8_program_watermarks(struct radeon_device *rdev,
9289                                     struct radeon_crtc *radeon_crtc,
9290                                     u32 lb_size, u32 num_heads)
9291 {
9292         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9293         struct dce8_wm_params wm_low, wm_high;
9294         u32 pixel_period;
9295         u32 line_time = 0;
9296         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9297         u32 tmp, wm_mask;
9298
9299         if (radeon_crtc->base.enabled && num_heads && mode) {
9300                 pixel_period = 1000000 / (u32)mode->clock;
9301                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9302
9303                 /* watermark for high clocks */
9304                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9305                     rdev->pm.dpm_enabled) {
9306                         wm_high.yclk =
9307                                 radeon_dpm_get_mclk(rdev, false) * 10;
9308                         wm_high.sclk =
9309                                 radeon_dpm_get_sclk(rdev, false) * 10;
9310                 } else {
9311                         wm_high.yclk = rdev->pm.current_mclk * 10;
9312                         wm_high.sclk = rdev->pm.current_sclk * 10;
9313                 }
9314
9315                 wm_high.disp_clk = mode->clock;
9316                 wm_high.src_width = mode->crtc_hdisplay;
9317                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9318                 wm_high.blank_time = line_time - wm_high.active_time;
9319                 wm_high.interlaced = false;
9320                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9321                         wm_high.interlaced = true;
9322                 wm_high.vsc = radeon_crtc->vsc;
9323                 wm_high.vtaps = 1;
9324                 if (radeon_crtc->rmx_type != RMX_OFF)
9325                         wm_high.vtaps = 2;
9326                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9327                 wm_high.lb_size = lb_size;
9328                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9329                 wm_high.num_heads = num_heads;
9330
9331                 /* set for high clocks */
9332                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9333
9334                 /* possibly force display priority to high */
9335                 /* should really do this at mode validation time... */
9336                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9337                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9338                     !dce8_check_latency_hiding(&wm_high) ||
9339                     (rdev->disp_priority == 2)) {
9340                         DRM_DEBUG_KMS("force priority to high\n");
9341                 }
9342
9343                 /* watermark for low clocks */
9344                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9345                     rdev->pm.dpm_enabled) {
9346                         wm_low.yclk =
9347                                 radeon_dpm_get_mclk(rdev, true) * 10;
9348                         wm_low.sclk =
9349                                 radeon_dpm_get_sclk(rdev, true) * 10;
9350                 } else {
9351                         wm_low.yclk = rdev->pm.current_mclk * 10;
9352                         wm_low.sclk = rdev->pm.current_sclk * 10;
9353                 }
9354
9355                 wm_low.disp_clk = mode->clock;
9356                 wm_low.src_width = mode->crtc_hdisplay;
9357                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9358                 wm_low.blank_time = line_time - wm_low.active_time;
9359                 wm_low.interlaced = false;
9360                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9361                         wm_low.interlaced = true;
9362                 wm_low.vsc = radeon_crtc->vsc;
9363                 wm_low.vtaps = 1;
9364                 if (radeon_crtc->rmx_type != RMX_OFF)
9365                         wm_low.vtaps = 2;
9366                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9367                 wm_low.lb_size = lb_size;
9368                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9369                 wm_low.num_heads = num_heads;
9370
9371                 /* set for low clocks */
9372                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9373
9374                 /* possibly force display priority to high */
9375                 /* should really do this at mode validation time... */
9376                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9377                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9378                     !dce8_check_latency_hiding(&wm_low) ||
9379                     (rdev->disp_priority == 2)) {
9380                         DRM_DEBUG_KMS("force priority to high\n");
9381                 }
9382         }
9383
9384         /* select wm A */
9385         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9386         tmp = wm_mask;
9387         tmp &= ~LATENCY_WATERMARK_MASK(3);
9388         tmp |= LATENCY_WATERMARK_MASK(1);
9389         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9390         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9391                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9392                 LATENCY_HIGH_WATERMARK(line_time)));
9393         /* select wm B */
9394         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9395         tmp &= ~LATENCY_WATERMARK_MASK(3);
9396         tmp |= LATENCY_WATERMARK_MASK(2);
9397         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9398         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9399                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9400                 LATENCY_HIGH_WATERMARK(line_time)));
9401         /* restore original selection */
9402         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9403
9404         /* save values for DPM */
9405         radeon_crtc->line_time = line_time;
9406         radeon_crtc->wm_high = latency_watermark_a;
9407         radeon_crtc->wm_low = latency_watermark_b;
9408 }
9409
9410 /**
9411  * dce8_bandwidth_update - program display watermarks
9412  *
9413  * @rdev: radeon_device pointer
9414  *
9415  * Calculate and program the display watermarks and line
9416  * buffer allocation (CIK).
9417  */
9418 void dce8_bandwidth_update(struct radeon_device *rdev)
9419 {
9420         struct drm_display_mode *mode = NULL;
9421         u32 num_heads = 0, lb_size;
9422         int i;
9423
9424         radeon_update_display_priority(rdev);
9425
9426         for (i = 0; i < rdev->num_crtc; i++) {
9427                 if (rdev->mode_info.crtcs[i]->base.enabled)
9428                         num_heads++;
9429         }
9430         for (i = 0; i < rdev->num_crtc; i++) {
9431                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9432                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9433                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9434         }
9435 }
9436
9437 /**
9438  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9439  *
9440  * @rdev: radeon_device pointer
9441  *
9442  * Fetches a GPU clock counter snapshot (SI).
9443  * Returns the 64 bit clock counter snapshot.
9444  */
9445 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9446 {
9447         uint64_t clock;
9448
9449         mutex_lock(&rdev->gpu_clock_mutex);
9450         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9451         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9452                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9453         mutex_unlock(&rdev->gpu_clock_mutex);
9454         return clock;
9455 }
9456
9457 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9458                               u32 cntl_reg, u32 status_reg)
9459 {
9460         int r, i;
9461         struct atom_clock_dividers dividers;
9462         uint32_t tmp;
9463
9464         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9465                                            clock, false, &dividers);
9466         if (r)
9467                 return r;
9468
9469         tmp = RREG32_SMC(cntl_reg);
9470         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9471         tmp |= dividers.post_divider;
9472         WREG32_SMC(cntl_reg, tmp);
9473
9474         for (i = 0; i < 100; i++) {
9475                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9476                         break;
9477                 mdelay(10);
9478         }
9479         if (i == 100)
9480                 return -ETIMEDOUT;
9481
9482         return 0;
9483 }
9484
9485 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9486 {
9487         int r = 0;
9488
9489         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9490         if (r)
9491                 return r;
9492
9493         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9494         return r;
9495 }
9496
9497 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9498 {
9499         int r, i;
9500         struct atom_clock_dividers dividers;
9501         u32 tmp;
9502
9503         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9504                                            ecclk, false, &dividers);
9505         if (r)
9506                 return r;
9507
9508         for (i = 0; i < 100; i++) {
9509                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9510                         break;
9511                 mdelay(10);
9512         }
9513         if (i == 100)
9514                 return -ETIMEDOUT;
9515
9516         tmp = RREG32_SMC(CG_ECLK_CNTL);
9517         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9518         tmp |= dividers.post_divider;
9519         WREG32_SMC(CG_ECLK_CNTL, tmp);
9520
9521         for (i = 0; i < 100; i++) {
9522                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9523                         break;
9524                 mdelay(10);
9525         }
9526         if (i == 100)
9527                 return -ETIMEDOUT;
9528
9529         return 0;
9530 }
9531
9532 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9533 {
9534         struct pci_dev *root = rdev->pdev->bus->self;
9535         int bridge_pos, gpu_pos;
9536         u32 speed_cntl, mask, current_data_rate;
9537         int ret, i;
9538         u16 tmp16;
9539
9540         if (pci_is_root_bus(rdev->pdev->bus))
9541                 return;
9542
9543         if (radeon_pcie_gen2 == 0)
9544                 return;
9545
9546         if (rdev->flags & RADEON_IS_IGP)
9547                 return;
9548
9549         if (!(rdev->flags & RADEON_IS_PCIE))
9550                 return;
9551
9552         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9553         if (ret != 0)
9554                 return;
9555
9556         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9557                 return;
9558
9559         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9560         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9561                 LC_CURRENT_DATA_RATE_SHIFT;
9562         if (mask & DRM_PCIE_SPEED_80) {
9563                 if (current_data_rate == 2) {
9564                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9565                         return;
9566                 }
9567                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9568         } else if (mask & DRM_PCIE_SPEED_50) {
9569                 if (current_data_rate == 1) {
9570                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9571                         return;
9572                 }
9573                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9574         }
9575
9576         bridge_pos = pci_pcie_cap(root);
9577         if (!bridge_pos)
9578                 return;
9579
9580         gpu_pos = pci_pcie_cap(rdev->pdev);
9581         if (!gpu_pos)
9582                 return;
9583
9584         if (mask & DRM_PCIE_SPEED_80) {
9585                 /* re-try equalization if gen3 is not already enabled */
9586                 if (current_data_rate != 2) {
9587                         u16 bridge_cfg, gpu_cfg;
9588                         u16 bridge_cfg2, gpu_cfg2;
9589                         u32 max_lw, current_lw, tmp;
9590
9591                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9592                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9593
9594                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9595                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9596
9597                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9598                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9599
9600                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9601                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9602                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9603
9604                         if (current_lw < max_lw) {
9605                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9606                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9607                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9608                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9609                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9610                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9611                                 }
9612                         }
9613
9614                         for (i = 0; i < 10; i++) {
9615                                 /* check status */
9616                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9617                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9618                                         break;
9619
9620                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9621                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9622
9623                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9624                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9625
9626                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9627                                 tmp |= LC_SET_QUIESCE;
9628                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9629
9630                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9631                                 tmp |= LC_REDO_EQ;
9632                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9633
9634                                 mdelay(100);
9635
9636                                 /* linkctl */
9637                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9638                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9639                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9640                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9641
9642                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9643                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9644                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9645                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9646
9647                                 /* linkctl2 */
9648                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9649                                 tmp16 &= ~((1 << 4) | (7 << 9));
9650                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9651                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9652
9653                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9654                                 tmp16 &= ~((1 << 4) | (7 << 9));
9655                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9656                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9657
9658                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9659                                 tmp &= ~LC_SET_QUIESCE;
9660                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9661                         }
9662                 }
9663         }
9664
9665         /* set the link speed */
9666         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9667         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9668         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9669
9670         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9671         tmp16 &= ~0xf;
9672         if (mask & DRM_PCIE_SPEED_80)
9673                 tmp16 |= 3; /* gen3 */
9674         else if (mask & DRM_PCIE_SPEED_50)
9675                 tmp16 |= 2; /* gen2 */
9676         else
9677                 tmp16 |= 1; /* gen1 */
9678         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9679
9680         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9681         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9682         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9683
9684         for (i = 0; i < rdev->usec_timeout; i++) {
9685                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9686                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9687                         break;
9688                 udelay(1);
9689         }
9690 }
9691
9692 static void cik_program_aspm(struct radeon_device *rdev)
9693 {
9694         u32 data, orig;
9695         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9696         bool disable_clkreq = false;
9697
9698         if (radeon_aspm == 0)
9699                 return;
9700
9701         /* XXX double check IGPs */
9702         if (rdev->flags & RADEON_IS_IGP)
9703                 return;
9704
9705         if (!(rdev->flags & RADEON_IS_PCIE))
9706                 return;
9707
9708         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9709         data &= ~LC_XMIT_N_FTS_MASK;
9710         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9711         if (orig != data)
9712                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9713
9714         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9715         data |= LC_GO_TO_RECOVERY;
9716         if (orig != data)
9717                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9718
9719         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9720         data |= P_IGNORE_EDB_ERR;
9721         if (orig != data)
9722                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9723
9724         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9725         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9726         data |= LC_PMI_TO_L1_DIS;
9727         if (!disable_l0s)
9728                 data |= LC_L0S_INACTIVITY(7);
9729
9730         if (!disable_l1) {
9731                 data |= LC_L1_INACTIVITY(7);
9732                 data &= ~LC_PMI_TO_L1_DIS;
9733                 if (orig != data)
9734                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9735
9736                 if (!disable_plloff_in_l1) {
9737                         bool clk_req_support;
9738
9739                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9740                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9741                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9742                         if (orig != data)
9743                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9744
9745                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9746                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9747                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9748                         if (orig != data)
9749                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9750
9751                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9752                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9753                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9754                         if (orig != data)
9755                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9756
9757                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9758                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9759                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9760                         if (orig != data)
9761                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9762
9763                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9764                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9765                         data |= LC_DYN_LANES_PWR_STATE(3);
9766                         if (orig != data)
9767                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9768
9769                         if (!disable_clkreq &&
9770                             !pci_is_root_bus(rdev->pdev->bus)) {
9771                                 struct pci_dev *root = rdev->pdev->bus->self;
9772                                 u32 lnkcap;
9773
9774                                 clk_req_support = false;
9775                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9776                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9777                                         clk_req_support = true;
9778                         } else {
9779                                 clk_req_support = false;
9780                         }
9781
9782                         if (clk_req_support) {
9783                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9784                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9785                                 if (orig != data)
9786                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9787
9788                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9789                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9790                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9791                                 if (orig != data)
9792                                         WREG32_SMC(THM_CLK_CNTL, data);
9793
9794                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9795                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9796                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9797                                 if (orig != data)
9798                                         WREG32_SMC(MISC_CLK_CTRL, data);
9799
9800                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9801                                 data &= ~BCLK_AS_XCLK;
9802                                 if (orig != data)
9803                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9804
9805                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9806                                 data &= ~FORCE_BIF_REFCLK_EN;
9807                                 if (orig != data)
9808                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9809
9810                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9811                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9812                                 data |= MPLL_CLKOUT_SEL(4);
9813                                 if (orig != data)
9814                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9815                         }
9816                 }
9817         } else {
9818                 if (orig != data)
9819                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9820         }
9821
9822         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9823         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9824         if (orig != data)
9825                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9826
9827         if (!disable_l0s) {
9828                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9829                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9830                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9831                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9832                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9833                                 data &= ~LC_L0S_INACTIVITY_MASK;
9834                                 if (orig != data)
9835                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9836                         }
9837                 }
9838         }
9839 }