drm/radeon: always apply pci shutdown callbacks
[linux-2.6-block.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
57
58 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
67
68 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
69 MODULE_FIRMWARE("radeon/hawaii_me.bin");
70 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
72 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
74 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
75 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
77
78 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
84
85 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
86 MODULE_FIRMWARE("radeon/kaveri_me.bin");
87 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
88 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
89 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
90 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
91 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
92
93 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
94 MODULE_FIRMWARE("radeon/KABINI_me.bin");
95 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
96 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
97 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
98 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
99
100 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
101 MODULE_FIRMWARE("radeon/kabini_me.bin");
102 MODULE_FIRMWARE("radeon/kabini_ce.bin");
103 MODULE_FIRMWARE("radeon/kabini_mec.bin");
104 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
105 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
106
107 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
113
114 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
115 MODULE_FIRMWARE("radeon/mullins_me.bin");
116 MODULE_FIRMWARE("radeon/mullins_ce.bin");
117 MODULE_FIRMWARE("radeon/mullins_mec.bin");
118 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
119 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
120
121 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
122 extern void r600_ih_ring_fini(struct radeon_device *rdev);
123 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 extern void sumo_rlc_fini(struct radeon_device *rdev);
127 extern int sumo_rlc_init(struct radeon_device *rdev);
128 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
129 extern void si_rlc_reset(struct radeon_device *rdev);
130 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
131 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
132 extern int cik_sdma_resume(struct radeon_device *rdev);
133 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
134 extern void cik_sdma_fini(struct radeon_device *rdev);
135 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
136 static void cik_rlc_stop(struct radeon_device *rdev);
137 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
138 static void cik_program_aspm(struct radeon_device *rdev);
139 static void cik_init_pg(struct radeon_device *rdev);
140 static void cik_init_cg(struct radeon_device *rdev);
141 static void cik_fini_pg(struct radeon_device *rdev);
142 static void cik_fini_cg(struct radeon_device *rdev);
143 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
144                                           bool enable);
145
146 /**
147  * cik_get_allowed_info_register - fetch the register for the info ioctl
148  *
149  * @rdev: radeon_device pointer
150  * @reg: register offset in bytes
151  * @val: register value
152  *
153  * Returns 0 for success or -EINVAL for an invalid register
154  *
155  */
156 int cik_get_allowed_info_register(struct radeon_device *rdev,
157                                   u32 reg, u32 *val)
158 {
159         switch (reg) {
160         case GRBM_STATUS:
161         case GRBM_STATUS2:
162         case GRBM_STATUS_SE0:
163         case GRBM_STATUS_SE1:
164         case GRBM_STATUS_SE2:
165         case GRBM_STATUS_SE3:
166         case SRBM_STATUS:
167         case SRBM_STATUS2:
168         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
169         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
170         case UVD_STATUS:
171         /* TODO VCE */
172                 *val = RREG32(reg);
173                 return 0;
174         default:
175                 return -EINVAL;
176         }
177 }
178
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
183 {
184         unsigned long flags;
185         u32 r;
186
187         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
188         WREG32(CIK_DIDT_IND_INDEX, (reg));
189         r = RREG32(CIK_DIDT_IND_DATA);
190         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
191         return r;
192 }
193
194 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
195 {
196         unsigned long flags;
197
198         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
199         WREG32(CIK_DIDT_IND_INDEX, (reg));
200         WREG32(CIK_DIDT_IND_DATA, (v));
201         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
202 }
203
204 /* get temperature in millidegrees */
205 int ci_get_temp(struct radeon_device *rdev)
206 {
207         u32 temp;
208         int actual_temp = 0;
209
210         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
211                 CTF_TEMP_SHIFT;
212
213         if (temp & 0x200)
214                 actual_temp = 255;
215         else
216                 actual_temp = temp & 0x1ff;
217
218         actual_temp = actual_temp * 1000;
219
220         return actual_temp;
221 }
222
223 /* get temperature in millidegrees */
224 int kv_get_temp(struct radeon_device *rdev)
225 {
226         u32 temp;
227         int actual_temp = 0;
228
229         temp = RREG32_SMC(0xC0300E0C);
230
231         if (temp)
232                 actual_temp = (temp / 8) - 49;
233         else
234                 actual_temp = 0;
235
236         actual_temp = actual_temp * 1000;
237
238         return actual_temp;
239 }
240
241 /*
242  * Indirect registers accessor
243  */
244 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
245 {
246         unsigned long flags;
247         u32 r;
248
249         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250         WREG32(PCIE_INDEX, reg);
251         (void)RREG32(PCIE_INDEX);
252         r = RREG32(PCIE_DATA);
253         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
254         return r;
255 }
256
257 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
258 {
259         unsigned long flags;
260
261         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
262         WREG32(PCIE_INDEX, reg);
263         (void)RREG32(PCIE_INDEX);
264         WREG32(PCIE_DATA, v);
265         (void)RREG32(PCIE_DATA);
266         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
267 }
268
269 static const u32 spectre_rlc_save_restore_register_list[] =
270 {
271         (0x0e00 << 16) | (0xc12c >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc140 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc150 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc15c >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc168 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc170 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc178 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc204 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b4 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2b8 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2bc >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc2c0 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x8228 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x829c >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x869c >> 2),
300         0x00000000,
301         (0x0600 << 16) | (0x98f4 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x98f8 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x9900 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc260 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x90e8 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c000 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x3c00c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x8c1c >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x9700 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x4e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x5e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x6e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x7e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x8e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0x9e00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xae00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0xbe00 << 16) | (0xcd20 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x89bc >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x8900 >> 2),
340         0x00000000,
341         0x3,
342         (0x0e00 << 16) | (0xc130 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc134 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc1fc >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc208 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc264 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc268 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc26c >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc270 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc274 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc278 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc27c >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc280 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc284 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc288 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc28c >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc290 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc294 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc298 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc29c >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a0 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a4 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2a8 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2ac  >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0xc2b0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x301d0 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30238 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30250 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30254 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x30258 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x3025c >> 2),
401         0x00000000,
402         (0x4e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x5e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x6e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x7e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x8e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0x9e00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xae00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0xbe00 << 16) | (0xc900 >> 2),
417         0x00000000,
418         (0x4e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x5e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x6e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x7e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x8e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0x9e00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xae00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0xbe00 << 16) | (0xc904 >> 2),
433         0x00000000,
434         (0x4e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x5e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x6e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x7e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x8e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0x9e00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xae00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0xbe00 << 16) | (0xc908 >> 2),
449         0x00000000,
450         (0x4e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x5e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x6e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x7e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x8e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0x9e00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xae00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0xbe00 << 16) | (0xc90c >> 2),
465         0x00000000,
466         (0x4e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x5e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x6e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x7e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x8e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0x9e00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xae00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0xbe00 << 16) | (0xc910 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0xc99c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0x9834 >> 2),
485         0x00000000,
486         (0x0000 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0001 << 16) | (0x30f00 >> 2),
489         0x00000000,
490         (0x0000 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0001 << 16) | (0x30f04 >> 2),
493         0x00000000,
494         (0x0000 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0001 << 16) | (0x30f08 >> 2),
497         0x00000000,
498         (0x0000 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0001 << 16) | (0x30f0c >> 2),
501         0x00000000,
502         (0x0600 << 16) | (0x9b7c >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a14 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8a18 >> 2),
507         0x00000000,
508         (0x0600 << 16) | (0x30a00 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bf0 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8bcc >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8b24 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x30a04 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a10 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a14 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a18 >> 2),
523         0x00000000,
524         (0x0600 << 16) | (0x30a2c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc700 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc704 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc708 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xc768 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc770 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc774 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc778 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc77c >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc780 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc784 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc788 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc78c >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc798 >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc79c >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a0 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a4 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7a8 >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7ac >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b0 >> 2),
563         0x00000000,
564         (0x0400 << 16) | (0xc7b4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x9100 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x3c010 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92a8 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92ac >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b4 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92b8 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92bc >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c0 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92c8 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92cc >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x92d0 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c00 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c04 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c20 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c38 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x8c3c >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xae00 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0x9604 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac08 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac0c >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac10 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac14 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac58 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac68 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac6c >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac70 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac74 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac78 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac7c >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac80 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac84 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac88 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xac8c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x970c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9714 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9718 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x971c >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x4e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x5e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x6e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x7e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x8e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0x9e00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xae00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0xbe00 << 16) | (0x31068 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd10 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xcd14 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b0 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b4 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88b8 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88bc >> 2),
671         0x00000000,
672         (0x0400 << 16) | (0x89c0 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c4 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88c8 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d0 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d4 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x88d8 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x8980 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x30938 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x3093c >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x30940 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x89a0 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30900 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30904 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x89b4 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c210 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c214 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3c218 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x8904 >> 2),
707         0x00000000,
708         0x5,
709         (0x0e00 << 16) | (0x8c28 >> 2),
710         (0x0e00 << 16) | (0x8c2c >> 2),
711         (0x0e00 << 16) | (0x8c30 >> 2),
712         (0x0e00 << 16) | (0x8c34 >> 2),
713         (0x0e00 << 16) | (0x9600 >> 2),
714 };
715
716 static const u32 kalindi_rlc_save_restore_register_list[] =
717 {
718         (0x0e00 << 16) | (0xc12c >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc140 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc150 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc15c >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc168 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc170 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc204 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b4 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2b8 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2bc >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc2c0 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x8228 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x829c >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x869c >> 2),
745         0x00000000,
746         (0x0600 << 16) | (0x98f4 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x98f8 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x9900 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0xc260 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x90e8 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c000 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x3c00c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x8c1c >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x9700 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x4e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x5e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x6e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x7e00 << 16) | (0xcd20 >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x89bc >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x8900 >> 2),
777         0x00000000,
778         0x3,
779         (0x0e00 << 16) | (0xc130 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc134 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc1fc >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc208 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc264 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc268 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc26c >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc270 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc274 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc28c >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc290 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc294 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc298 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a0 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a4 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2a8 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xc2ac >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x301d0 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30238 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30250 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30254 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x30258 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x3025c >> 2),
824         0x00000000,
825         (0x4e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x5e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x6e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x7e00 << 16) | (0xc900 >> 2),
832         0x00000000,
833         (0x4e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x5e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x6e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x7e00 << 16) | (0xc904 >> 2),
840         0x00000000,
841         (0x4e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x5e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x6e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x7e00 << 16) | (0xc908 >> 2),
848         0x00000000,
849         (0x4e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x5e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x6e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x7e00 << 16) | (0xc90c >> 2),
856         0x00000000,
857         (0x4e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x5e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x6e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x7e00 << 16) | (0xc910 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0xc99c >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x9834 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f00 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f04 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f08 >> 2),
874         0x00000000,
875         (0x0000 << 16) | (0x30f0c >> 2),
876         0x00000000,
877         (0x0600 << 16) | (0x9b7c >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a14 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x8a18 >> 2),
882         0x00000000,
883         (0x0600 << 16) | (0x30a00 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bf0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8bcc >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x8b24 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x30a04 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a10 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a14 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a18 >> 2),
898         0x00000000,
899         (0x0600 << 16) | (0x30a2c >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc700 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc704 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc708 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xc768 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc770 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc774 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc798 >> 2),
914         0x00000000,
915         (0x0400 << 16) | (0xc79c >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x9100 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x3c010 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c00 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c04 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c20 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c38 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8c3c >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0xae00 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0x9604 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac08 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac0c >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac10 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac14 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac58 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac68 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac6c >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac70 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac74 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac78 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac7c >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac80 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac84 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac88 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0xac8c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x970c >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9714 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x9718 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x971c >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x4e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x5e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x6e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x7e00 << 16) | (0x31068 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd10 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0xcd14 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b0 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b4 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88b8 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88bc >> 2),
994         0x00000000,
995         (0x0400 << 16) | (0x89c0 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c4 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88c8 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d0 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d4 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88d8 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x8980 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x30938 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x3093c >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x30940 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x89a0 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30900 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30904 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x89b4 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3e1fc >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c210 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c214 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3c218 >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x8904 >> 2),
1032         0x00000000,
1033         0x5,
1034         (0x0e00 << 16) | (0x8c28 >> 2),
1035         (0x0e00 << 16) | (0x8c2c >> 2),
1036         (0x0e00 << 16) | (0x8c30 >> 2),
1037         (0x0e00 << 16) | (0x8c34 >> 2),
1038         (0x0e00 << 16) | (0x9600 >> 2),
1039 };
1040
1041 static const u32 bonaire_golden_spm_registers[] =
1042 {
1043         0x30800, 0xe0ffffff, 0xe0000000
1044 };
1045
1046 static const u32 bonaire_golden_common_registers[] =
1047 {
1048         0xc770, 0xffffffff, 0x00000800,
1049         0xc774, 0xffffffff, 0x00000800,
1050         0xc798, 0xffffffff, 0x00007fbf,
1051         0xc79c, 0xffffffff, 0x00007faf
1052 };
1053
1054 static const u32 bonaire_golden_registers[] =
1055 {
1056         0x3354, 0x00000333, 0x00000333,
1057         0x3350, 0x000c0fc0, 0x00040200,
1058         0x9a10, 0x00010000, 0x00058208,
1059         0x3c000, 0xffff1fff, 0x00140000,
1060         0x3c200, 0xfdfc0fff, 0x00000100,
1061         0x3c234, 0x40000000, 0x40000200,
1062         0x9830, 0xffffffff, 0x00000000,
1063         0x9834, 0xf00fffff, 0x00000400,
1064         0x9838, 0x0002021c, 0x00020200,
1065         0xc78, 0x00000080, 0x00000000,
1066         0x5bb0, 0x000000f0, 0x00000070,
1067         0x5bc0, 0xf0311fff, 0x80300000,
1068         0x98f8, 0x73773777, 0x12010001,
1069         0x350c, 0x00810000, 0x408af000,
1070         0x7030, 0x31000111, 0x00000011,
1071         0x2f48, 0x73773777, 0x12010001,
1072         0x220c, 0x00007fb6, 0x0021a1b1,
1073         0x2210, 0x00007fb6, 0x002021b1,
1074         0x2180, 0x00007fb6, 0x00002191,
1075         0x2218, 0x00007fb6, 0x002121b1,
1076         0x221c, 0x00007fb6, 0x002021b1,
1077         0x21dc, 0x00007fb6, 0x00002191,
1078         0x21e0, 0x00007fb6, 0x00002191,
1079         0x3628, 0x0000003f, 0x0000000a,
1080         0x362c, 0x0000003f, 0x0000000a,
1081         0x2ae4, 0x00073ffe, 0x000022a2,
1082         0x240c, 0x000007ff, 0x00000000,
1083         0x8a14, 0xf000003f, 0x00000007,
1084         0x8bf0, 0x00002001, 0x00000001,
1085         0x8b24, 0xffffffff, 0x00ffffff,
1086         0x30a04, 0x0000ff0f, 0x00000000,
1087         0x28a4c, 0x07ffffff, 0x06000000,
1088         0x4d8, 0x00000fff, 0x00000100,
1089         0x3e78, 0x00000001, 0x00000002,
1090         0x9100, 0x03000000, 0x0362c688,
1091         0x8c00, 0x000000ff, 0x00000001,
1092         0xe40, 0x00001fff, 0x00001fff,
1093         0x9060, 0x0000007f, 0x00000020,
1094         0x9508, 0x00010000, 0x00010000,
1095         0xac14, 0x000003ff, 0x000000f3,
1096         0xac0c, 0xffffffff, 0x00001032
1097 };
1098
1099 static const u32 bonaire_mgcg_cgcg_init[] =
1100 {
1101         0xc420, 0xffffffff, 0xfffffffc,
1102         0x30800, 0xffffffff, 0xe0000000,
1103         0x3c2a0, 0xffffffff, 0x00000100,
1104         0x3c208, 0xffffffff, 0x00000100,
1105         0x3c2c0, 0xffffffff, 0xc0000100,
1106         0x3c2c8, 0xffffffff, 0xc0000100,
1107         0x3c2c4, 0xffffffff, 0xc0000100,
1108         0x55e4, 0xffffffff, 0x00600100,
1109         0x3c280, 0xffffffff, 0x00000100,
1110         0x3c214, 0xffffffff, 0x06000100,
1111         0x3c220, 0xffffffff, 0x00000100,
1112         0x3c218, 0xffffffff, 0x06000100,
1113         0x3c204, 0xffffffff, 0x00000100,
1114         0x3c2e0, 0xffffffff, 0x00000100,
1115         0x3c224, 0xffffffff, 0x00000100,
1116         0x3c200, 0xffffffff, 0x00000100,
1117         0x3c230, 0xffffffff, 0x00000100,
1118         0x3c234, 0xffffffff, 0x00000100,
1119         0x3c250, 0xffffffff, 0x00000100,
1120         0x3c254, 0xffffffff, 0x00000100,
1121         0x3c258, 0xffffffff, 0x00000100,
1122         0x3c25c, 0xffffffff, 0x00000100,
1123         0x3c260, 0xffffffff, 0x00000100,
1124         0x3c27c, 0xffffffff, 0x00000100,
1125         0x3c278, 0xffffffff, 0x00000100,
1126         0x3c210, 0xffffffff, 0x06000100,
1127         0x3c290, 0xffffffff, 0x00000100,
1128         0x3c274, 0xffffffff, 0x00000100,
1129         0x3c2b4, 0xffffffff, 0x00000100,
1130         0x3c2b0, 0xffffffff, 0x00000100,
1131         0x3c270, 0xffffffff, 0x00000100,
1132         0x30800, 0xffffffff, 0xe0000000,
1133         0x3c020, 0xffffffff, 0x00010000,
1134         0x3c024, 0xffffffff, 0x00030002,
1135         0x3c028, 0xffffffff, 0x00040007,
1136         0x3c02c, 0xffffffff, 0x00060005,
1137         0x3c030, 0xffffffff, 0x00090008,
1138         0x3c034, 0xffffffff, 0x00010000,
1139         0x3c038, 0xffffffff, 0x00030002,
1140         0x3c03c, 0xffffffff, 0x00040007,
1141         0x3c040, 0xffffffff, 0x00060005,
1142         0x3c044, 0xffffffff, 0x00090008,
1143         0x3c048, 0xffffffff, 0x00010000,
1144         0x3c04c, 0xffffffff, 0x00030002,
1145         0x3c050, 0xffffffff, 0x00040007,
1146         0x3c054, 0xffffffff, 0x00060005,
1147         0x3c058, 0xffffffff, 0x00090008,
1148         0x3c05c, 0xffffffff, 0x00010000,
1149         0x3c060, 0xffffffff, 0x00030002,
1150         0x3c064, 0xffffffff, 0x00040007,
1151         0x3c068, 0xffffffff, 0x00060005,
1152         0x3c06c, 0xffffffff, 0x00090008,
1153         0x3c070, 0xffffffff, 0x00010000,
1154         0x3c074, 0xffffffff, 0x00030002,
1155         0x3c078, 0xffffffff, 0x00040007,
1156         0x3c07c, 0xffffffff, 0x00060005,
1157         0x3c080, 0xffffffff, 0x00090008,
1158         0x3c084, 0xffffffff, 0x00010000,
1159         0x3c088, 0xffffffff, 0x00030002,
1160         0x3c08c, 0xffffffff, 0x00040007,
1161         0x3c090, 0xffffffff, 0x00060005,
1162         0x3c094, 0xffffffff, 0x00090008,
1163         0x3c098, 0xffffffff, 0x00010000,
1164         0x3c09c, 0xffffffff, 0x00030002,
1165         0x3c0a0, 0xffffffff, 0x00040007,
1166         0x3c0a4, 0xffffffff, 0x00060005,
1167         0x3c0a8, 0xffffffff, 0x00090008,
1168         0x3c000, 0xffffffff, 0x96e00200,
1169         0x8708, 0xffffffff, 0x00900100,
1170         0xc424, 0xffffffff, 0x0020003f,
1171         0x38, 0xffffffff, 0x0140001c,
1172         0x3c, 0x000f0000, 0x000f0000,
1173         0x220, 0xffffffff, 0xC060000C,
1174         0x224, 0xc0000fff, 0x00000100,
1175         0xf90, 0xffffffff, 0x00000100,
1176         0xf98, 0x00000101, 0x00000000,
1177         0x20a8, 0xffffffff, 0x00000104,
1178         0x55e4, 0xff000fff, 0x00000100,
1179         0x30cc, 0xc0000fff, 0x00000104,
1180         0xc1e4, 0x00000001, 0x00000001,
1181         0xd00c, 0xff000ff0, 0x00000100,
1182         0xd80c, 0xff000ff0, 0x00000100
1183 };
1184
1185 static const u32 spectre_golden_spm_registers[] =
1186 {
1187         0x30800, 0xe0ffffff, 0xe0000000
1188 };
1189
1190 static const u32 spectre_golden_common_registers[] =
1191 {
1192         0xc770, 0xffffffff, 0x00000800,
1193         0xc774, 0xffffffff, 0x00000800,
1194         0xc798, 0xffffffff, 0x00007fbf,
1195         0xc79c, 0xffffffff, 0x00007faf
1196 };
1197
1198 static const u32 spectre_golden_registers[] =
1199 {
1200         0x3c000, 0xffff1fff, 0x96940200,
1201         0x3c00c, 0xffff0001, 0xff000000,
1202         0x3c200, 0xfffc0fff, 0x00000100,
1203         0x6ed8, 0x00010101, 0x00010000,
1204         0x9834, 0xf00fffff, 0x00000400,
1205         0x9838, 0xfffffffc, 0x00020200,
1206         0x5bb0, 0x000000f0, 0x00000070,
1207         0x5bc0, 0xf0311fff, 0x80300000,
1208         0x98f8, 0x73773777, 0x12010001,
1209         0x9b7c, 0x00ff0000, 0x00fc0000,
1210         0x2f48, 0x73773777, 0x12010001,
1211         0x8a14, 0xf000003f, 0x00000007,
1212         0x8b24, 0xffffffff, 0x00ffffff,
1213         0x28350, 0x3f3f3fff, 0x00000082,
1214         0x28354, 0x0000003f, 0x00000000,
1215         0x3e78, 0x00000001, 0x00000002,
1216         0x913c, 0xffff03df, 0x00000004,
1217         0xc768, 0x00000008, 0x00000008,
1218         0x8c00, 0x000008ff, 0x00000800,
1219         0x9508, 0x00010000, 0x00010000,
1220         0xac0c, 0xffffffff, 0x54763210,
1221         0x214f8, 0x01ff01ff, 0x00000002,
1222         0x21498, 0x007ff800, 0x00200000,
1223         0x2015c, 0xffffffff, 0x00000f40,
1224         0x30934, 0xffffffff, 0x00000001
1225 };
1226
1227 static const u32 spectre_mgcg_cgcg_init[] =
1228 {
1229         0xc420, 0xffffffff, 0xfffffffc,
1230         0x30800, 0xffffffff, 0xe0000000,
1231         0x3c2a0, 0xffffffff, 0x00000100,
1232         0x3c208, 0xffffffff, 0x00000100,
1233         0x3c2c0, 0xffffffff, 0x00000100,
1234         0x3c2c8, 0xffffffff, 0x00000100,
1235         0x3c2c4, 0xffffffff, 0x00000100,
1236         0x55e4, 0xffffffff, 0x00600100,
1237         0x3c280, 0xffffffff, 0x00000100,
1238         0x3c214, 0xffffffff, 0x06000100,
1239         0x3c220, 0xffffffff, 0x00000100,
1240         0x3c218, 0xffffffff, 0x06000100,
1241         0x3c204, 0xffffffff, 0x00000100,
1242         0x3c2e0, 0xffffffff, 0x00000100,
1243         0x3c224, 0xffffffff, 0x00000100,
1244         0x3c200, 0xffffffff, 0x00000100,
1245         0x3c230, 0xffffffff, 0x00000100,
1246         0x3c234, 0xffffffff, 0x00000100,
1247         0x3c250, 0xffffffff, 0x00000100,
1248         0x3c254, 0xffffffff, 0x00000100,
1249         0x3c258, 0xffffffff, 0x00000100,
1250         0x3c25c, 0xffffffff, 0x00000100,
1251         0x3c260, 0xffffffff, 0x00000100,
1252         0x3c27c, 0xffffffff, 0x00000100,
1253         0x3c278, 0xffffffff, 0x00000100,
1254         0x3c210, 0xffffffff, 0x06000100,
1255         0x3c290, 0xffffffff, 0x00000100,
1256         0x3c274, 0xffffffff, 0x00000100,
1257         0x3c2b4, 0xffffffff, 0x00000100,
1258         0x3c2b0, 0xffffffff, 0x00000100,
1259         0x3c270, 0xffffffff, 0x00000100,
1260         0x30800, 0xffffffff, 0xe0000000,
1261         0x3c020, 0xffffffff, 0x00010000,
1262         0x3c024, 0xffffffff, 0x00030002,
1263         0x3c028, 0xffffffff, 0x00040007,
1264         0x3c02c, 0xffffffff, 0x00060005,
1265         0x3c030, 0xffffffff, 0x00090008,
1266         0x3c034, 0xffffffff, 0x00010000,
1267         0x3c038, 0xffffffff, 0x00030002,
1268         0x3c03c, 0xffffffff, 0x00040007,
1269         0x3c040, 0xffffffff, 0x00060005,
1270         0x3c044, 0xffffffff, 0x00090008,
1271         0x3c048, 0xffffffff, 0x00010000,
1272         0x3c04c, 0xffffffff, 0x00030002,
1273         0x3c050, 0xffffffff, 0x00040007,
1274         0x3c054, 0xffffffff, 0x00060005,
1275         0x3c058, 0xffffffff, 0x00090008,
1276         0x3c05c, 0xffffffff, 0x00010000,
1277         0x3c060, 0xffffffff, 0x00030002,
1278         0x3c064, 0xffffffff, 0x00040007,
1279         0x3c068, 0xffffffff, 0x00060005,
1280         0x3c06c, 0xffffffff, 0x00090008,
1281         0x3c070, 0xffffffff, 0x00010000,
1282         0x3c074, 0xffffffff, 0x00030002,
1283         0x3c078, 0xffffffff, 0x00040007,
1284         0x3c07c, 0xffffffff, 0x00060005,
1285         0x3c080, 0xffffffff, 0x00090008,
1286         0x3c084, 0xffffffff, 0x00010000,
1287         0x3c088, 0xffffffff, 0x00030002,
1288         0x3c08c, 0xffffffff, 0x00040007,
1289         0x3c090, 0xffffffff, 0x00060005,
1290         0x3c094, 0xffffffff, 0x00090008,
1291         0x3c098, 0xffffffff, 0x00010000,
1292         0x3c09c, 0xffffffff, 0x00030002,
1293         0x3c0a0, 0xffffffff, 0x00040007,
1294         0x3c0a4, 0xffffffff, 0x00060005,
1295         0x3c0a8, 0xffffffff, 0x00090008,
1296         0x3c0ac, 0xffffffff, 0x00010000,
1297         0x3c0b0, 0xffffffff, 0x00030002,
1298         0x3c0b4, 0xffffffff, 0x00040007,
1299         0x3c0b8, 0xffffffff, 0x00060005,
1300         0x3c0bc, 0xffffffff, 0x00090008,
1301         0x3c000, 0xffffffff, 0x96e00200,
1302         0x8708, 0xffffffff, 0x00900100,
1303         0xc424, 0xffffffff, 0x0020003f,
1304         0x38, 0xffffffff, 0x0140001c,
1305         0x3c, 0x000f0000, 0x000f0000,
1306         0x220, 0xffffffff, 0xC060000C,
1307         0x224, 0xc0000fff, 0x00000100,
1308         0xf90, 0xffffffff, 0x00000100,
1309         0xf98, 0x00000101, 0x00000000,
1310         0x20a8, 0xffffffff, 0x00000104,
1311         0x55e4, 0xff000fff, 0x00000100,
1312         0x30cc, 0xc0000fff, 0x00000104,
1313         0xc1e4, 0x00000001, 0x00000001,
1314         0xd00c, 0xff000ff0, 0x00000100,
1315         0xd80c, 0xff000ff0, 0x00000100
1316 };
1317
1318 static const u32 kalindi_golden_spm_registers[] =
1319 {
1320         0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322
1323 static const u32 kalindi_golden_common_registers[] =
1324 {
1325         0xc770, 0xffffffff, 0x00000800,
1326         0xc774, 0xffffffff, 0x00000800,
1327         0xc798, 0xffffffff, 0x00007fbf,
1328         0xc79c, 0xffffffff, 0x00007faf
1329 };
1330
1331 static const u32 kalindi_golden_registers[] =
1332 {
1333         0x3c000, 0xffffdfff, 0x6e944040,
1334         0x55e4, 0xff607fff, 0xfc000100,
1335         0x3c220, 0xff000fff, 0x00000100,
1336         0x3c224, 0xff000fff, 0x00000100,
1337         0x3c200, 0xfffc0fff, 0x00000100,
1338         0x6ed8, 0x00010101, 0x00010000,
1339         0x9830, 0xffffffff, 0x00000000,
1340         0x9834, 0xf00fffff, 0x00000400,
1341         0x5bb0, 0x000000f0, 0x00000070,
1342         0x5bc0, 0xf0311fff, 0x80300000,
1343         0x98f8, 0x73773777, 0x12010001,
1344         0x98fc, 0xffffffff, 0x00000010,
1345         0x9b7c, 0x00ff0000, 0x00fc0000,
1346         0x8030, 0x00001f0f, 0x0000100a,
1347         0x2f48, 0x73773777, 0x12010001,
1348         0x2408, 0x000fffff, 0x000c007f,
1349         0x8a14, 0xf000003f, 0x00000007,
1350         0x8b24, 0x3fff3fff, 0x00ffcfff,
1351         0x30a04, 0x0000ff0f, 0x00000000,
1352         0x28a4c, 0x07ffffff, 0x06000000,
1353         0x4d8, 0x00000fff, 0x00000100,
1354         0x3e78, 0x00000001, 0x00000002,
1355         0xc768, 0x00000008, 0x00000008,
1356         0x8c00, 0x000000ff, 0x00000003,
1357         0x214f8, 0x01ff01ff, 0x00000002,
1358         0x21498, 0x007ff800, 0x00200000,
1359         0x2015c, 0xffffffff, 0x00000f40,
1360         0x88c4, 0x001f3ae3, 0x00000082,
1361         0x88d4, 0x0000001f, 0x00000010,
1362         0x30934, 0xffffffff, 0x00000000
1363 };
1364
1365 static const u32 kalindi_mgcg_cgcg_init[] =
1366 {
1367         0xc420, 0xffffffff, 0xfffffffc,
1368         0x30800, 0xffffffff, 0xe0000000,
1369         0x3c2a0, 0xffffffff, 0x00000100,
1370         0x3c208, 0xffffffff, 0x00000100,
1371         0x3c2c0, 0xffffffff, 0x00000100,
1372         0x3c2c8, 0xffffffff, 0x00000100,
1373         0x3c2c4, 0xffffffff, 0x00000100,
1374         0x55e4, 0xffffffff, 0x00600100,
1375         0x3c280, 0xffffffff, 0x00000100,
1376         0x3c214, 0xffffffff, 0x06000100,
1377         0x3c220, 0xffffffff, 0x00000100,
1378         0x3c218, 0xffffffff, 0x06000100,
1379         0x3c204, 0xffffffff, 0x00000100,
1380         0x3c2e0, 0xffffffff, 0x00000100,
1381         0x3c224, 0xffffffff, 0x00000100,
1382         0x3c200, 0xffffffff, 0x00000100,
1383         0x3c230, 0xffffffff, 0x00000100,
1384         0x3c234, 0xffffffff, 0x00000100,
1385         0x3c250, 0xffffffff, 0x00000100,
1386         0x3c254, 0xffffffff, 0x00000100,
1387         0x3c258, 0xffffffff, 0x00000100,
1388         0x3c25c, 0xffffffff, 0x00000100,
1389         0x3c260, 0xffffffff, 0x00000100,
1390         0x3c27c, 0xffffffff, 0x00000100,
1391         0x3c278, 0xffffffff, 0x00000100,
1392         0x3c210, 0xffffffff, 0x06000100,
1393         0x3c290, 0xffffffff, 0x00000100,
1394         0x3c274, 0xffffffff, 0x00000100,
1395         0x3c2b4, 0xffffffff, 0x00000100,
1396         0x3c2b0, 0xffffffff, 0x00000100,
1397         0x3c270, 0xffffffff, 0x00000100,
1398         0x30800, 0xffffffff, 0xe0000000,
1399         0x3c020, 0xffffffff, 0x00010000,
1400         0x3c024, 0xffffffff, 0x00030002,
1401         0x3c028, 0xffffffff, 0x00040007,
1402         0x3c02c, 0xffffffff, 0x00060005,
1403         0x3c030, 0xffffffff, 0x00090008,
1404         0x3c034, 0xffffffff, 0x00010000,
1405         0x3c038, 0xffffffff, 0x00030002,
1406         0x3c03c, 0xffffffff, 0x00040007,
1407         0x3c040, 0xffffffff, 0x00060005,
1408         0x3c044, 0xffffffff, 0x00090008,
1409         0x3c000, 0xffffffff, 0x96e00200,
1410         0x8708, 0xffffffff, 0x00900100,
1411         0xc424, 0xffffffff, 0x0020003f,
1412         0x38, 0xffffffff, 0x0140001c,
1413         0x3c, 0x000f0000, 0x000f0000,
1414         0x220, 0xffffffff, 0xC060000C,
1415         0x224, 0xc0000fff, 0x00000100,
1416         0x20a8, 0xffffffff, 0x00000104,
1417         0x55e4, 0xff000fff, 0x00000100,
1418         0x30cc, 0xc0000fff, 0x00000104,
1419         0xc1e4, 0x00000001, 0x00000001,
1420         0xd00c, 0xff000ff0, 0x00000100,
1421         0xd80c, 0xff000ff0, 0x00000100
1422 };
1423
1424 static const u32 hawaii_golden_spm_registers[] =
1425 {
1426         0x30800, 0xe0ffffff, 0xe0000000
1427 };
1428
1429 static const u32 hawaii_golden_common_registers[] =
1430 {
1431         0x30800, 0xffffffff, 0xe0000000,
1432         0x28350, 0xffffffff, 0x3a00161a,
1433         0x28354, 0xffffffff, 0x0000002e,
1434         0x9a10, 0xffffffff, 0x00018208,
1435         0x98f8, 0xffffffff, 0x12011003
1436 };
1437
1438 static const u32 hawaii_golden_registers[] =
1439 {
1440         0x3354, 0x00000333, 0x00000333,
1441         0x9a10, 0x00010000, 0x00058208,
1442         0x9830, 0xffffffff, 0x00000000,
1443         0x9834, 0xf00fffff, 0x00000400,
1444         0x9838, 0x0002021c, 0x00020200,
1445         0xc78, 0x00000080, 0x00000000,
1446         0x5bb0, 0x000000f0, 0x00000070,
1447         0x5bc0, 0xf0311fff, 0x80300000,
1448         0x350c, 0x00810000, 0x408af000,
1449         0x7030, 0x31000111, 0x00000011,
1450         0x2f48, 0x73773777, 0x12010001,
1451         0x2120, 0x0000007f, 0x0000001b,
1452         0x21dc, 0x00007fb6, 0x00002191,
1453         0x3628, 0x0000003f, 0x0000000a,
1454         0x362c, 0x0000003f, 0x0000000a,
1455         0x2ae4, 0x00073ffe, 0x000022a2,
1456         0x240c, 0x000007ff, 0x00000000,
1457         0x8bf0, 0x00002001, 0x00000001,
1458         0x8b24, 0xffffffff, 0x00ffffff,
1459         0x30a04, 0x0000ff0f, 0x00000000,
1460         0x28a4c, 0x07ffffff, 0x06000000,
1461         0x3e78, 0x00000001, 0x00000002,
1462         0xc768, 0x00000008, 0x00000008,
1463         0xc770, 0x00000f00, 0x00000800,
1464         0xc774, 0x00000f00, 0x00000800,
1465         0xc798, 0x00ffffff, 0x00ff7fbf,
1466         0xc79c, 0x00ffffff, 0x00ff7faf,
1467         0x8c00, 0x000000ff, 0x00000800,
1468         0xe40, 0x00001fff, 0x00001fff,
1469         0x9060, 0x0000007f, 0x00000020,
1470         0x9508, 0x00010000, 0x00010000,
1471         0xae00, 0x00100000, 0x000ff07c,
1472         0xac14, 0x000003ff, 0x0000000f,
1473         0xac10, 0xffffffff, 0x7564fdec,
1474         0xac0c, 0xffffffff, 0x3120b9a8,
1475         0xac08, 0x20000000, 0x0f9c0000
1476 };
1477
1478 static const u32 hawaii_mgcg_cgcg_init[] =
1479 {
1480         0xc420, 0xffffffff, 0xfffffffd,
1481         0x30800, 0xffffffff, 0xe0000000,
1482         0x3c2a0, 0xffffffff, 0x00000100,
1483         0x3c208, 0xffffffff, 0x00000100,
1484         0x3c2c0, 0xffffffff, 0x00000100,
1485         0x3c2c8, 0xffffffff, 0x00000100,
1486         0x3c2c4, 0xffffffff, 0x00000100,
1487         0x55e4, 0xffffffff, 0x00200100,
1488         0x3c280, 0xffffffff, 0x00000100,
1489         0x3c214, 0xffffffff, 0x06000100,
1490         0x3c220, 0xffffffff, 0x00000100,
1491         0x3c218, 0xffffffff, 0x06000100,
1492         0x3c204, 0xffffffff, 0x00000100,
1493         0x3c2e0, 0xffffffff, 0x00000100,
1494         0x3c224, 0xffffffff, 0x00000100,
1495         0x3c200, 0xffffffff, 0x00000100,
1496         0x3c230, 0xffffffff, 0x00000100,
1497         0x3c234, 0xffffffff, 0x00000100,
1498         0x3c250, 0xffffffff, 0x00000100,
1499         0x3c254, 0xffffffff, 0x00000100,
1500         0x3c258, 0xffffffff, 0x00000100,
1501         0x3c25c, 0xffffffff, 0x00000100,
1502         0x3c260, 0xffffffff, 0x00000100,
1503         0x3c27c, 0xffffffff, 0x00000100,
1504         0x3c278, 0xffffffff, 0x00000100,
1505         0x3c210, 0xffffffff, 0x06000100,
1506         0x3c290, 0xffffffff, 0x00000100,
1507         0x3c274, 0xffffffff, 0x00000100,
1508         0x3c2b4, 0xffffffff, 0x00000100,
1509         0x3c2b0, 0xffffffff, 0x00000100,
1510         0x3c270, 0xffffffff, 0x00000100,
1511         0x30800, 0xffffffff, 0xe0000000,
1512         0x3c020, 0xffffffff, 0x00010000,
1513         0x3c024, 0xffffffff, 0x00030002,
1514         0x3c028, 0xffffffff, 0x00040007,
1515         0x3c02c, 0xffffffff, 0x00060005,
1516         0x3c030, 0xffffffff, 0x00090008,
1517         0x3c034, 0xffffffff, 0x00010000,
1518         0x3c038, 0xffffffff, 0x00030002,
1519         0x3c03c, 0xffffffff, 0x00040007,
1520         0x3c040, 0xffffffff, 0x00060005,
1521         0x3c044, 0xffffffff, 0x00090008,
1522         0x3c048, 0xffffffff, 0x00010000,
1523         0x3c04c, 0xffffffff, 0x00030002,
1524         0x3c050, 0xffffffff, 0x00040007,
1525         0x3c054, 0xffffffff, 0x00060005,
1526         0x3c058, 0xffffffff, 0x00090008,
1527         0x3c05c, 0xffffffff, 0x00010000,
1528         0x3c060, 0xffffffff, 0x00030002,
1529         0x3c064, 0xffffffff, 0x00040007,
1530         0x3c068, 0xffffffff, 0x00060005,
1531         0x3c06c, 0xffffffff, 0x00090008,
1532         0x3c070, 0xffffffff, 0x00010000,
1533         0x3c074, 0xffffffff, 0x00030002,
1534         0x3c078, 0xffffffff, 0x00040007,
1535         0x3c07c, 0xffffffff, 0x00060005,
1536         0x3c080, 0xffffffff, 0x00090008,
1537         0x3c084, 0xffffffff, 0x00010000,
1538         0x3c088, 0xffffffff, 0x00030002,
1539         0x3c08c, 0xffffffff, 0x00040007,
1540         0x3c090, 0xffffffff, 0x00060005,
1541         0x3c094, 0xffffffff, 0x00090008,
1542         0x3c098, 0xffffffff, 0x00010000,
1543         0x3c09c, 0xffffffff, 0x00030002,
1544         0x3c0a0, 0xffffffff, 0x00040007,
1545         0x3c0a4, 0xffffffff, 0x00060005,
1546         0x3c0a8, 0xffffffff, 0x00090008,
1547         0x3c0ac, 0xffffffff, 0x00010000,
1548         0x3c0b0, 0xffffffff, 0x00030002,
1549         0x3c0b4, 0xffffffff, 0x00040007,
1550         0x3c0b8, 0xffffffff, 0x00060005,
1551         0x3c0bc, 0xffffffff, 0x00090008,
1552         0x3c0c0, 0xffffffff, 0x00010000,
1553         0x3c0c4, 0xffffffff, 0x00030002,
1554         0x3c0c8, 0xffffffff, 0x00040007,
1555         0x3c0cc, 0xffffffff, 0x00060005,
1556         0x3c0d0, 0xffffffff, 0x00090008,
1557         0x3c0d4, 0xffffffff, 0x00010000,
1558         0x3c0d8, 0xffffffff, 0x00030002,
1559         0x3c0dc, 0xffffffff, 0x00040007,
1560         0x3c0e0, 0xffffffff, 0x00060005,
1561         0x3c0e4, 0xffffffff, 0x00090008,
1562         0x3c0e8, 0xffffffff, 0x00010000,
1563         0x3c0ec, 0xffffffff, 0x00030002,
1564         0x3c0f0, 0xffffffff, 0x00040007,
1565         0x3c0f4, 0xffffffff, 0x00060005,
1566         0x3c0f8, 0xffffffff, 0x00090008,
1567         0xc318, 0xffffffff, 0x00020200,
1568         0x3350, 0xffffffff, 0x00000200,
1569         0x15c0, 0xffffffff, 0x00000400,
1570         0x55e8, 0xffffffff, 0x00000000,
1571         0x2f50, 0xffffffff, 0x00000902,
1572         0x3c000, 0xffffffff, 0x96940200,
1573         0x8708, 0xffffffff, 0x00900100,
1574         0xc424, 0xffffffff, 0x0020003f,
1575         0x38, 0xffffffff, 0x0140001c,
1576         0x3c, 0x000f0000, 0x000f0000,
1577         0x220, 0xffffffff, 0xc060000c,
1578         0x224, 0xc0000fff, 0x00000100,
1579         0xf90, 0xffffffff, 0x00000100,
1580         0xf98, 0x00000101, 0x00000000,
1581         0x20a8, 0xffffffff, 0x00000104,
1582         0x55e4, 0xff000fff, 0x00000100,
1583         0x30cc, 0xc0000fff, 0x00000104,
1584         0xc1e4, 0x00000001, 0x00000001,
1585         0xd00c, 0xff000ff0, 0x00000100,
1586         0xd80c, 0xff000ff0, 0x00000100
1587 };
1588
1589 static const u32 godavari_golden_registers[] =
1590 {
1591         0x55e4, 0xff607fff, 0xfc000100,
1592         0x6ed8, 0x00010101, 0x00010000,
1593         0x9830, 0xffffffff, 0x00000000,
1594         0x98302, 0xf00fffff, 0x00000400,
1595         0x6130, 0xffffffff, 0x00010000,
1596         0x5bb0, 0x000000f0, 0x00000070,
1597         0x5bc0, 0xf0311fff, 0x80300000,
1598         0x98f8, 0x73773777, 0x12010001,
1599         0x98fc, 0xffffffff, 0x00000010,
1600         0x8030, 0x00001f0f, 0x0000100a,
1601         0x2f48, 0x73773777, 0x12010001,
1602         0x2408, 0x000fffff, 0x000c007f,
1603         0x8a14, 0xf000003f, 0x00000007,
1604         0x8b24, 0xffffffff, 0x00ff0fff,
1605         0x30a04, 0x0000ff0f, 0x00000000,
1606         0x28a4c, 0x07ffffff, 0x06000000,
1607         0x4d8, 0x00000fff, 0x00000100,
1608         0xd014, 0x00010000, 0x00810001,
1609         0xd814, 0x00010000, 0x00810001,
1610         0x3e78, 0x00000001, 0x00000002,
1611         0xc768, 0x00000008, 0x00000008,
1612         0xc770, 0x00000f00, 0x00000800,
1613         0xc774, 0x00000f00, 0x00000800,
1614         0xc798, 0x00ffffff, 0x00ff7fbf,
1615         0xc79c, 0x00ffffff, 0x00ff7faf,
1616         0x8c00, 0x000000ff, 0x00000001,
1617         0x214f8, 0x01ff01ff, 0x00000002,
1618         0x21498, 0x007ff800, 0x00200000,
1619         0x2015c, 0xffffffff, 0x00000f40,
1620         0x88c4, 0x001f3ae3, 0x00000082,
1621         0x88d4, 0x0000001f, 0x00000010,
1622         0x30934, 0xffffffff, 0x00000000
1623 };
1624
1625
1626 static void cik_init_golden_registers(struct radeon_device *rdev)
1627 {
1628         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1629         mutex_lock(&rdev->grbm_idx_mutex);
1630         switch (rdev->family) {
1631         case CHIP_BONAIRE:
1632                 radeon_program_register_sequence(rdev,
1633                                                  bonaire_mgcg_cgcg_init,
1634                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635                 radeon_program_register_sequence(rdev,
1636                                                  bonaire_golden_registers,
1637                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638                 radeon_program_register_sequence(rdev,
1639                                                  bonaire_golden_common_registers,
1640                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641                 radeon_program_register_sequence(rdev,
1642                                                  bonaire_golden_spm_registers,
1643                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644                 break;
1645         case CHIP_KABINI:
1646                 radeon_program_register_sequence(rdev,
1647                                                  kalindi_mgcg_cgcg_init,
1648                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649                 radeon_program_register_sequence(rdev,
1650                                                  kalindi_golden_registers,
1651                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652                 radeon_program_register_sequence(rdev,
1653                                                  kalindi_golden_common_registers,
1654                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655                 radeon_program_register_sequence(rdev,
1656                                                  kalindi_golden_spm_registers,
1657                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658                 break;
1659         case CHIP_MULLINS:
1660                 radeon_program_register_sequence(rdev,
1661                                                  kalindi_mgcg_cgcg_init,
1662                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663                 radeon_program_register_sequence(rdev,
1664                                                  godavari_golden_registers,
1665                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1666                 radeon_program_register_sequence(rdev,
1667                                                  kalindi_golden_common_registers,
1668                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669                 radeon_program_register_sequence(rdev,
1670                                                  kalindi_golden_spm_registers,
1671                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672                 break;
1673         case CHIP_KAVERI:
1674                 radeon_program_register_sequence(rdev,
1675                                                  spectre_mgcg_cgcg_init,
1676                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677                 radeon_program_register_sequence(rdev,
1678                                                  spectre_golden_registers,
1679                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1680                 radeon_program_register_sequence(rdev,
1681                                                  spectre_golden_common_registers,
1682                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683                 radeon_program_register_sequence(rdev,
1684                                                  spectre_golden_spm_registers,
1685                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686                 break;
1687         case CHIP_HAWAII:
1688                 radeon_program_register_sequence(rdev,
1689                                                  hawaii_mgcg_cgcg_init,
1690                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691                 radeon_program_register_sequence(rdev,
1692                                                  hawaii_golden_registers,
1693                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694                 radeon_program_register_sequence(rdev,
1695                                                  hawaii_golden_common_registers,
1696                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697                 radeon_program_register_sequence(rdev,
1698                                                  hawaii_golden_spm_registers,
1699                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700                 break;
1701         default:
1702                 break;
1703         }
1704         mutex_unlock(&rdev->grbm_idx_mutex);
1705 }
1706
1707 /**
1708  * cik_get_xclk - get the xclk
1709  *
1710  * @rdev: radeon_device pointer
1711  *
1712  * Returns the reference clock used by the gfx engine
1713  * (CIK).
1714  */
1715 u32 cik_get_xclk(struct radeon_device *rdev)
1716 {
1717         u32 reference_clock = rdev->clock.spll.reference_freq;
1718
1719         if (rdev->flags & RADEON_IS_IGP) {
1720                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1721                         return reference_clock / 2;
1722         } else {
1723                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1724                         return reference_clock / 4;
1725         }
1726         return reference_clock;
1727 }
1728
1729 /**
1730  * cik_mm_rdoorbell - read a doorbell dword
1731  *
1732  * @rdev: radeon_device pointer
1733  * @index: doorbell index
1734  *
1735  * Returns the value in the doorbell aperture at the
1736  * requested doorbell index (CIK).
1737  */
1738 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1739 {
1740         if (index < rdev->doorbell.num_doorbells) {
1741                 return readl(rdev->doorbell.ptr + index);
1742         } else {
1743                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1744                 return 0;
1745         }
1746 }
1747
1748 /**
1749  * cik_mm_wdoorbell - write a doorbell dword
1750  *
1751  * @rdev: radeon_device pointer
1752  * @index: doorbell index
1753  * @v: value to write
1754  *
1755  * Writes @v to the doorbell aperture at the
1756  * requested doorbell index (CIK).
1757  */
1758 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1759 {
1760         if (index < rdev->doorbell.num_doorbells) {
1761                 writel(v, rdev->doorbell.ptr + index);
1762         } else {
1763                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1764         }
1765 }
1766
1767 #define BONAIRE_IO_MC_REGS_SIZE 36
1768
1769 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1770 {
1771         {0x00000070, 0x04400000},
1772         {0x00000071, 0x80c01803},
1773         {0x00000072, 0x00004004},
1774         {0x00000073, 0x00000100},
1775         {0x00000074, 0x00ff0000},
1776         {0x00000075, 0x34000000},
1777         {0x00000076, 0x08000014},
1778         {0x00000077, 0x00cc08ec},
1779         {0x00000078, 0x00000400},
1780         {0x00000079, 0x00000000},
1781         {0x0000007a, 0x04090000},
1782         {0x0000007c, 0x00000000},
1783         {0x0000007e, 0x4408a8e8},
1784         {0x0000007f, 0x00000304},
1785         {0x00000080, 0x00000000},
1786         {0x00000082, 0x00000001},
1787         {0x00000083, 0x00000002},
1788         {0x00000084, 0xf3e4f400},
1789         {0x00000085, 0x052024e3},
1790         {0x00000087, 0x00000000},
1791         {0x00000088, 0x01000000},
1792         {0x0000008a, 0x1c0a0000},
1793         {0x0000008b, 0xff010000},
1794         {0x0000008d, 0xffffefff},
1795         {0x0000008e, 0xfff3efff},
1796         {0x0000008f, 0xfff3efbf},
1797         {0x00000092, 0xf7ffffff},
1798         {0x00000093, 0xffffff7f},
1799         {0x00000095, 0x00101101},
1800         {0x00000096, 0x00000fff},
1801         {0x00000097, 0x00116fff},
1802         {0x00000098, 0x60010000},
1803         {0x00000099, 0x10010000},
1804         {0x0000009a, 0x00006000},
1805         {0x0000009b, 0x00001000},
1806         {0x0000009f, 0x00b48000}
1807 };
1808
1809 #define HAWAII_IO_MC_REGS_SIZE 22
1810
1811 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1812 {
1813         {0x0000007d, 0x40000000},
1814         {0x0000007e, 0x40180304},
1815         {0x0000007f, 0x0000ff00},
1816         {0x00000081, 0x00000000},
1817         {0x00000083, 0x00000800},
1818         {0x00000086, 0x00000000},
1819         {0x00000087, 0x00000100},
1820         {0x00000088, 0x00020100},
1821         {0x00000089, 0x00000000},
1822         {0x0000008b, 0x00040000},
1823         {0x0000008c, 0x00000100},
1824         {0x0000008e, 0xff010000},
1825         {0x00000090, 0xffffefff},
1826         {0x00000091, 0xfff3efff},
1827         {0x00000092, 0xfff3efbf},
1828         {0x00000093, 0xf7ffffff},
1829         {0x00000094, 0xffffff7f},
1830         {0x00000095, 0x00000fff},
1831         {0x00000096, 0x00116fff},
1832         {0x00000097, 0x60010000},
1833         {0x00000098, 0x10010000},
1834         {0x0000009f, 0x00c79000}
1835 };
1836
1837
1838 /**
1839  * cik_srbm_select - select specific register instances
1840  *
1841  * @rdev: radeon_device pointer
1842  * @me: selected ME (micro engine)
1843  * @pipe: pipe
1844  * @queue: queue
1845  * @vmid: VMID
1846  *
1847  * Switches the currently active registers instances.  Some
1848  * registers are instanced per VMID, others are instanced per
1849  * me/pipe/queue combination.
1850  */
1851 static void cik_srbm_select(struct radeon_device *rdev,
1852                             u32 me, u32 pipe, u32 queue, u32 vmid)
1853 {
1854         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1855                              MEID(me & 0x3) |
1856                              VMID(vmid & 0xf) |
1857                              QUEUEID(queue & 0x7));
1858         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1859 }
1860
1861 /* ucode loading */
1862 /**
1863  * ci_mc_load_microcode - load MC ucode into the hw
1864  *
1865  * @rdev: radeon_device pointer
1866  *
1867  * Load the GDDR MC ucode into the hw (CIK).
1868  * Returns 0 on success, error on failure.
1869  */
1870 int ci_mc_load_microcode(struct radeon_device *rdev)
1871 {
1872         const __be32 *fw_data = NULL;
1873         const __le32 *new_fw_data = NULL;
1874         u32 running, tmp;
1875         u32 *io_mc_regs = NULL;
1876         const __le32 *new_io_mc_regs = NULL;
1877         int i, regs_size, ucode_size;
1878
1879         if (!rdev->mc_fw)
1880                 return -EINVAL;
1881
1882         if (rdev->new_fw) {
1883                 const struct mc_firmware_header_v1_0 *hdr =
1884                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1885
1886                 radeon_ucode_print_mc_hdr(&hdr->header);
1887
1888                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1889                 new_io_mc_regs = (const __le32 *)
1890                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1891                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1892                 new_fw_data = (const __le32 *)
1893                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1894         } else {
1895                 ucode_size = rdev->mc_fw->size / 4;
1896
1897                 switch (rdev->family) {
1898                 case CHIP_BONAIRE:
1899                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1900                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1901                         break;
1902                 case CHIP_HAWAII:
1903                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1904                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1905                         break;
1906                 default:
1907                         return -EINVAL;
1908                 }
1909                 fw_data = (const __be32 *)rdev->mc_fw->data;
1910         }
1911
1912         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1913
1914         if (running == 0) {
1915                 /* reset the engine and set to writable */
1916                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1917                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1918
1919                 /* load mc io regs */
1920                 for (i = 0; i < regs_size; i++) {
1921                         if (rdev->new_fw) {
1922                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1923                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1924                         } else {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1927                         }
1928                 }
1929
1930                 tmp = RREG32(MC_SEQ_MISC0);
1931                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1932                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1933                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1934                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1935                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1936                 }
1937
1938                 /* load the MC ucode */
1939                 for (i = 0; i < ucode_size; i++) {
1940                         if (rdev->new_fw)
1941                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1942                         else
1943                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1944                 }
1945
1946                 /* put the engine back into the active state */
1947                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1948                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1949                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1950
1951                 /* wait for training to complete */
1952                 for (i = 0; i < rdev->usec_timeout; i++) {
1953                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1954                                 break;
1955                         udelay(1);
1956                 }
1957                 for (i = 0; i < rdev->usec_timeout; i++) {
1958                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1959                                 break;
1960                         udelay(1);
1961                 }
1962         }
1963
1964         return 0;
1965 }
1966
1967 /**
1968  * cik_init_microcode - load ucode images from disk
1969  *
1970  * @rdev: radeon_device pointer
1971  *
1972  * Use the firmware interface to load the ucode images into
1973  * the driver (not loaded into hw).
1974  * Returns 0 on success, error on failure.
1975  */
1976 static int cik_init_microcode(struct radeon_device *rdev)
1977 {
1978         const char *chip_name;
1979         const char *new_chip_name;
1980         size_t pfp_req_size, me_req_size, ce_req_size,
1981                 mec_req_size, rlc_req_size, mc_req_size = 0,
1982                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1983         char fw_name[30];
1984         int new_fw = 0;
1985         int err;
1986         int num_fw;
1987         bool new_smc = false;
1988
1989         DRM_DEBUG("\n");
1990
1991         switch (rdev->family) {
1992         case CHIP_BONAIRE:
1993                 chip_name = "BONAIRE";
1994                 if ((rdev->pdev->revision == 0x80) ||
1995                     (rdev->pdev->revision == 0x81) ||
1996                     (rdev->pdev->device == 0x665f))
1997                         new_smc = true;
1998                 new_chip_name = "bonaire";
1999                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2000                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2001                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2002                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2003                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2004                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2005                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2006                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2007                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2008                 num_fw = 8;
2009                 break;
2010         case CHIP_HAWAII:
2011                 chip_name = "HAWAII";
2012                 if (rdev->pdev->revision == 0x80)
2013                         new_smc = true;
2014                 new_chip_name = "hawaii";
2015                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2016                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2017                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2018                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2019                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2020                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2021                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2022                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2024                 num_fw = 8;
2025                 break;
2026         case CHIP_KAVERI:
2027                 chip_name = "KAVERI";
2028                 new_chip_name = "kaveri";
2029                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2030                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2031                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2032                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2033                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2034                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2035                 num_fw = 7;
2036                 break;
2037         case CHIP_KABINI:
2038                 chip_name = "KABINI";
2039                 new_chip_name = "kabini";
2040                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2041                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2042                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2043                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2044                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2045                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2046                 num_fw = 6;
2047                 break;
2048         case CHIP_MULLINS:
2049                 chip_name = "MULLINS";
2050                 new_chip_name = "mullins";
2051                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2052                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2053                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2054                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2055                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2056                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2057                 num_fw = 6;
2058                 break;
2059         default: BUG();
2060         }
2061
2062         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2063
2064         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2065         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2066         if (err) {
2067                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2068                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069                 if (err)
2070                         goto out;
2071                 if (rdev->pfp_fw->size != pfp_req_size) {
2072                         printk(KERN_ERR
2073                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074                                rdev->pfp_fw->size, fw_name);
2075                         err = -EINVAL;
2076                         goto out;
2077                 }
2078         } else {
2079                 err = radeon_ucode_validate(rdev->pfp_fw);
2080                 if (err) {
2081                         printk(KERN_ERR
2082                                "cik_fw: validation failed for firmware \"%s\"\n",
2083                                fw_name);
2084                         goto out;
2085                 } else {
2086                         new_fw++;
2087                 }
2088         }
2089
2090         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092         if (err) {
2093                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095                 if (err)
2096                         goto out;
2097                 if (rdev->me_fw->size != me_req_size) {
2098                         printk(KERN_ERR
2099                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100                                rdev->me_fw->size, fw_name);
2101                         err = -EINVAL;
2102                 }
2103         } else {
2104                 err = radeon_ucode_validate(rdev->me_fw);
2105                 if (err) {
2106                         printk(KERN_ERR
2107                                "cik_fw: validation failed for firmware \"%s\"\n",
2108                                fw_name);
2109                         goto out;
2110                 } else {
2111                         new_fw++;
2112                 }
2113         }
2114
2115         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2116         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2117         if (err) {
2118                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2119                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2120                 if (err)
2121                         goto out;
2122                 if (rdev->ce_fw->size != ce_req_size) {
2123                         printk(KERN_ERR
2124                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2125                                rdev->ce_fw->size, fw_name);
2126                         err = -EINVAL;
2127                 }
2128         } else {
2129                 err = radeon_ucode_validate(rdev->ce_fw);
2130                 if (err) {
2131                         printk(KERN_ERR
2132                                "cik_fw: validation failed for firmware \"%s\"\n",
2133                                fw_name);
2134                         goto out;
2135                 } else {
2136                         new_fw++;
2137                 }
2138         }
2139
2140         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2141         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142         if (err) {
2143                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2144                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2145                 if (err)
2146                         goto out;
2147                 if (rdev->mec_fw->size != mec_req_size) {
2148                         printk(KERN_ERR
2149                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2150                                rdev->mec_fw->size, fw_name);
2151                         err = -EINVAL;
2152                 }
2153         } else {
2154                 err = radeon_ucode_validate(rdev->mec_fw);
2155                 if (err) {
2156                         printk(KERN_ERR
2157                                "cik_fw: validation failed for firmware \"%s\"\n",
2158                                fw_name);
2159                         goto out;
2160                 } else {
2161                         new_fw++;
2162                 }
2163         }
2164
2165         if (rdev->family == CHIP_KAVERI) {
2166                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2167                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2168                 if (err) {
2169                         goto out;
2170                 } else {
2171                         err = radeon_ucode_validate(rdev->mec2_fw);
2172                         if (err) {
2173                                 goto out;
2174                         } else {
2175                                 new_fw++;
2176                         }
2177                 }
2178         }
2179
2180         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2181         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2182         if (err) {
2183                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2184                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2185                 if (err)
2186                         goto out;
2187                 if (rdev->rlc_fw->size != rlc_req_size) {
2188                         printk(KERN_ERR
2189                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2190                                rdev->rlc_fw->size, fw_name);
2191                         err = -EINVAL;
2192                 }
2193         } else {
2194                 err = radeon_ucode_validate(rdev->rlc_fw);
2195                 if (err) {
2196                         printk(KERN_ERR
2197                                "cik_fw: validation failed for firmware \"%s\"\n",
2198                                fw_name);
2199                         goto out;
2200                 } else {
2201                         new_fw++;
2202                 }
2203         }
2204
2205         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2206         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2207         if (err) {
2208                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2209                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2210                 if (err)
2211                         goto out;
2212                 if (rdev->sdma_fw->size != sdma_req_size) {
2213                         printk(KERN_ERR
2214                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2215                                rdev->sdma_fw->size, fw_name);
2216                         err = -EINVAL;
2217                 }
2218         } else {
2219                 err = radeon_ucode_validate(rdev->sdma_fw);
2220                 if (err) {
2221                         printk(KERN_ERR
2222                                "cik_fw: validation failed for firmware \"%s\"\n",
2223                                fw_name);
2224                         goto out;
2225                 } else {
2226                         new_fw++;
2227                 }
2228         }
2229
2230         /* No SMC, MC ucode on APUs */
2231         if (!(rdev->flags & RADEON_IS_IGP)) {
2232                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2233                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2234                 if (err) {
2235                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2236                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2237                         if (err) {
2238                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2239                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2240                                 if (err)
2241                                         goto out;
2242                         }
2243                         if ((rdev->mc_fw->size != mc_req_size) &&
2244                             (rdev->mc_fw->size != mc2_req_size)){
2245                                 printk(KERN_ERR
2246                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2247                                        rdev->mc_fw->size, fw_name);
2248                                 err = -EINVAL;
2249                         }
2250                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2251                 } else {
2252                         err = radeon_ucode_validate(rdev->mc_fw);
2253                         if (err) {
2254                                 printk(KERN_ERR
2255                                        "cik_fw: validation failed for firmware \"%s\"\n",
2256                                        fw_name);
2257                                 goto out;
2258                         } else {
2259                                 new_fw++;
2260                         }
2261                 }
2262
2263                 if (new_smc)
2264                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2265                 else
2266                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2267                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2268                 if (err) {
2269                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2270                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2271                         if (err) {
2272                                 printk(KERN_ERR
2273                                        "smc: error loading firmware \"%s\"\n",
2274                                        fw_name);
2275                                 release_firmware(rdev->smc_fw);
2276                                 rdev->smc_fw = NULL;
2277                                 err = 0;
2278                         } else if (rdev->smc_fw->size != smc_req_size) {
2279                                 printk(KERN_ERR
2280                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2281                                        rdev->smc_fw->size, fw_name);
2282                                 err = -EINVAL;
2283                         }
2284                 } else {
2285                         err = radeon_ucode_validate(rdev->smc_fw);
2286                         if (err) {
2287                                 printk(KERN_ERR
2288                                        "cik_fw: validation failed for firmware \"%s\"\n",
2289                                        fw_name);
2290                                 goto out;
2291                         } else {
2292                                 new_fw++;
2293                         }
2294                 }
2295         }
2296
2297         if (new_fw == 0) {
2298                 rdev->new_fw = false;
2299         } else if (new_fw < num_fw) {
2300                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2301                 err = -EINVAL;
2302         } else {
2303                 rdev->new_fw = true;
2304         }
2305
2306 out:
2307         if (err) {
2308                 if (err != -EINVAL)
2309                         printk(KERN_ERR
2310                                "cik_cp: Failed to load firmware \"%s\"\n",
2311                                fw_name);
2312                 release_firmware(rdev->pfp_fw);
2313                 rdev->pfp_fw = NULL;
2314                 release_firmware(rdev->me_fw);
2315                 rdev->me_fw = NULL;
2316                 release_firmware(rdev->ce_fw);
2317                 rdev->ce_fw = NULL;
2318                 release_firmware(rdev->mec_fw);
2319                 rdev->mec_fw = NULL;
2320                 release_firmware(rdev->mec2_fw);
2321                 rdev->mec2_fw = NULL;
2322                 release_firmware(rdev->rlc_fw);
2323                 rdev->rlc_fw = NULL;
2324                 release_firmware(rdev->sdma_fw);
2325                 rdev->sdma_fw = NULL;
2326                 release_firmware(rdev->mc_fw);
2327                 rdev->mc_fw = NULL;
2328                 release_firmware(rdev->smc_fw);
2329                 rdev->smc_fw = NULL;
2330         }
2331         return err;
2332 }
2333
2334 /*
2335  * Core functions
2336  */
2337 /**
2338  * cik_tiling_mode_table_init - init the hw tiling table
2339  *
2340  * @rdev: radeon_device pointer
2341  *
2342  * Starting with SI, the tiling setup is done globally in a
2343  * set of 32 tiling modes.  Rather than selecting each set of
2344  * parameters per surface as on older asics, we just select
2345  * which index in the tiling table we want to use, and the
2346  * surface uses those parameters (CIK).
2347  */
2348 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2349 {
2350         u32 *tile = rdev->config.cik.tile_mode_array;
2351         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2352         const u32 num_tile_mode_states =
2353                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2354         const u32 num_secondary_tile_mode_states =
2355                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2356         u32 reg_offset, split_equal_to_row_size;
2357         u32 num_pipe_configs;
2358         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2359                 rdev->config.cik.max_shader_engines;
2360
2361         switch (rdev->config.cik.mem_row_size_in_kb) {
2362         case 1:
2363                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2364                 break;
2365         case 2:
2366         default:
2367                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2368                 break;
2369         case 4:
2370                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2371                 break;
2372         }
2373
2374         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2375         if (num_pipe_configs > 8)
2376                 num_pipe_configs = 16;
2377
2378         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2379                 tile[reg_offset] = 0;
2380         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2381                 macrotile[reg_offset] = 0;
2382
2383         switch(num_pipe_configs) {
2384         case 16:
2385                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2389                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2401                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                            TILE_SPLIT(split_equal_to_row_size));
2405                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2412                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2413                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2414                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                            TILE_SPLIT(split_equal_to_row_size));
2416                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2417                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2418                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2421                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2436                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2451                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2458                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463
2464                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                            NUM_BANKS(ADDR_SURF_16_BANK));
2468                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                            NUM_BANKS(ADDR_SURF_16_BANK));
2472                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                            NUM_BANKS(ADDR_SURF_16_BANK));
2476                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479                            NUM_BANKS(ADDR_SURF_16_BANK));
2480                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                            NUM_BANKS(ADDR_SURF_8_BANK));
2484                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                            NUM_BANKS(ADDR_SURF_4_BANK));
2488                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                            NUM_BANKS(ADDR_SURF_2_BANK));
2492                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                            NUM_BANKS(ADDR_SURF_16_BANK));
2496                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2498                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499                            NUM_BANKS(ADDR_SURF_16_BANK));
2500                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503                             NUM_BANKS(ADDR_SURF_16_BANK));
2504                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                             NUM_BANKS(ADDR_SURF_8_BANK));
2508                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                             NUM_BANKS(ADDR_SURF_4_BANK));
2512                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                             NUM_BANKS(ADDR_SURF_2_BANK));
2516                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                             NUM_BANKS(ADDR_SURF_2_BANK));
2520
2521                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2522                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2523                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2524                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2525                 break;
2526
2527         case 8:
2528                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2532                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2536                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2544                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                            TILE_SPLIT(split_equal_to_row_size));
2548                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2556                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                            TILE_SPLIT(split_equal_to_row_size));
2559                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2561                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2564                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2579                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2594                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606
2607                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                 NUM_BANKS(ADDR_SURF_8_BANK));
2627                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                 NUM_BANKS(ADDR_SURF_4_BANK));
2631                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                 NUM_BANKS(ADDR_SURF_2_BANK));
2635                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_8_BANK));
2655                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_4_BANK));
2659                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                 NUM_BANKS(ADDR_SURF_2_BANK));
2663
2664                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2665                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2666                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2667                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2668                 break;
2669
2670         case 4:
2671                 if (num_rbs == 4) {
2672                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2676                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2680                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2688                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                            TILE_SPLIT(split_equal_to_row_size));
2692                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2699                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2700                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2701                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                            TILE_SPLIT(split_equal_to_row_size));
2703                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2723                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2738                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2739                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750
2751                 } else if (num_rbs < 4) {
2752                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2756                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2760                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2768                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            TILE_SPLIT(split_equal_to_row_size));
2772                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2780                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782                            TILE_SPLIT(split_equal_to_row_size));
2783                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2785                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2788                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2803                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2818                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                 }
2831
2832                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_8_BANK));
2856                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859                                 NUM_BANKS(ADDR_SURF_4_BANK));
2860                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                 NUM_BANKS(ADDR_SURF_16_BANK));
2868                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                 NUM_BANKS(ADDR_SURF_16_BANK));
2872                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                 NUM_BANKS(ADDR_SURF_8_BANK));
2884                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887                                 NUM_BANKS(ADDR_SURF_4_BANK));
2888
2889                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2891                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2892                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2893                 break;
2894
2895         case 2:
2896                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2900                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2904                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2912                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914                            PIPE_CONFIG(ADDR_SURF_P2) |
2915                            TILE_SPLIT(split_equal_to_row_size));
2916                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                            PIPE_CONFIG(ADDR_SURF_P2) |
2918                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921                            PIPE_CONFIG(ADDR_SURF_P2) |
2922                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2923                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2925                            PIPE_CONFIG(ADDR_SURF_P2) |
2926                            TILE_SPLIT(split_equal_to_row_size));
2927                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928                            PIPE_CONFIG(ADDR_SURF_P2);
2929                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931                            PIPE_CONFIG(ADDR_SURF_P2));
2932                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2) |
2943                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                             PIPE_CONFIG(ADDR_SURF_P2) |
2946                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2947                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2) |
2958                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2960                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961                             PIPE_CONFIG(ADDR_SURF_P2));
2962                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2963                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964                             PIPE_CONFIG(ADDR_SURF_P2) |
2965                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968                             PIPE_CONFIG(ADDR_SURF_P2) |
2969                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                             PIPE_CONFIG(ADDR_SURF_P2) |
2973                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974
2975                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                 NUM_BANKS(ADDR_SURF_16_BANK));
2987                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002                                 NUM_BANKS(ADDR_SURF_8_BANK));
3003                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030                                 NUM_BANKS(ADDR_SURF_8_BANK));
3031
3032                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3033                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3034                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3035                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3036                 break;
3037
3038         default:
3039                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3040         }
3041 }
3042
3043 /**
3044  * cik_select_se_sh - select which SE, SH to address
3045  *
3046  * @rdev: radeon_device pointer
3047  * @se_num: shader engine to address
3048  * @sh_num: sh block to address
3049  *
3050  * Select which SE, SH combinations to address. Certain
3051  * registers are instanced per SE or SH.  0xffffffff means
3052  * broadcast to all SEs or SHs (CIK).
3053  */
3054 static void cik_select_se_sh(struct radeon_device *rdev,
3055                              u32 se_num, u32 sh_num)
3056 {
3057         u32 data = INSTANCE_BROADCAST_WRITES;
3058
3059         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3060                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3061         else if (se_num == 0xffffffff)
3062                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3063         else if (sh_num == 0xffffffff)
3064                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3065         else
3066                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3067         WREG32(GRBM_GFX_INDEX, data);
3068 }
3069
3070 /**
3071  * cik_create_bitmask - create a bitmask
3072  *
3073  * @bit_width: length of the mask
3074  *
3075  * create a variable length bit mask (CIK).
3076  * Returns the bitmask.
3077  */
3078 static u32 cik_create_bitmask(u32 bit_width)
3079 {
3080         u32 i, mask = 0;
3081
3082         for (i = 0; i < bit_width; i++) {
3083                 mask <<= 1;
3084                 mask |= 1;
3085         }
3086         return mask;
3087 }
3088
3089 /**
3090  * cik_get_rb_disabled - computes the mask of disabled RBs
3091  *
3092  * @rdev: radeon_device pointer
3093  * @max_rb_num: max RBs (render backends) for the asic
3094  * @se_num: number of SEs (shader engines) for the asic
3095  * @sh_per_se: number of SH blocks per SE for the asic
3096  *
3097  * Calculates the bitmask of disabled RBs (CIK).
3098  * Returns the disabled RB bitmask.
3099  */
3100 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3101                               u32 max_rb_num_per_se,
3102                               u32 sh_per_se)
3103 {
3104         u32 data, mask;
3105
3106         data = RREG32(CC_RB_BACKEND_DISABLE);
3107         if (data & 1)
3108                 data &= BACKEND_DISABLE_MASK;
3109         else
3110                 data = 0;
3111         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3112
3113         data >>= BACKEND_DISABLE_SHIFT;
3114
3115         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3116
3117         return data & mask;
3118 }
3119
3120 /**
3121  * cik_setup_rb - setup the RBs on the asic
3122  *
3123  * @rdev: radeon_device pointer
3124  * @se_num: number of SEs (shader engines) for the asic
3125  * @sh_per_se: number of SH blocks per SE for the asic
3126  * @max_rb_num: max RBs (render backends) for the asic
3127  *
3128  * Configures per-SE/SH RB registers (CIK).
3129  */
3130 static void cik_setup_rb(struct radeon_device *rdev,
3131                          u32 se_num, u32 sh_per_se,
3132                          u32 max_rb_num_per_se)
3133 {
3134         int i, j;
3135         u32 data, mask;
3136         u32 disabled_rbs = 0;
3137         u32 enabled_rbs = 0;
3138
3139         mutex_lock(&rdev->grbm_idx_mutex);
3140         for (i = 0; i < se_num; i++) {
3141                 for (j = 0; j < sh_per_se; j++) {
3142                         cik_select_se_sh(rdev, i, j);
3143                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3144                         if (rdev->family == CHIP_HAWAII)
3145                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3146                         else
3147                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3148                 }
3149         }
3150         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3151         mutex_unlock(&rdev->grbm_idx_mutex);
3152
3153         mask = 1;
3154         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3155                 if (!(disabled_rbs & mask))
3156                         enabled_rbs |= mask;
3157                 mask <<= 1;
3158         }
3159
3160         rdev->config.cik.backend_enable_mask = enabled_rbs;
3161
3162         mutex_lock(&rdev->grbm_idx_mutex);
3163         for (i = 0; i < se_num; i++) {
3164                 cik_select_se_sh(rdev, i, 0xffffffff);
3165                 data = 0;
3166                 for (j = 0; j < sh_per_se; j++) {
3167                         switch (enabled_rbs & 3) {
3168                         case 0:
3169                                 if (j == 0)
3170                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3171                                 else
3172                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3173                                 break;
3174                         case 1:
3175                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3176                                 break;
3177                         case 2:
3178                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3179                                 break;
3180                         case 3:
3181                         default:
3182                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3183                                 break;
3184                         }
3185                         enabled_rbs >>= 2;
3186                 }
3187                 WREG32(PA_SC_RASTER_CONFIG, data);
3188         }
3189         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3190         mutex_unlock(&rdev->grbm_idx_mutex);
3191 }
3192
3193 /**
3194  * cik_gpu_init - setup the 3D engine
3195  *
3196  * @rdev: radeon_device pointer
3197  *
3198  * Configures the 3D engine and tiling configuration
3199  * registers so that the 3D engine is usable.
3200  */
3201 static void cik_gpu_init(struct radeon_device *rdev)
3202 {
3203         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3204         u32 mc_shared_chmap, mc_arb_ramcfg;
3205         u32 hdp_host_path_cntl;
3206         u32 tmp;
3207         int i, j;
3208
3209         switch (rdev->family) {
3210         case CHIP_BONAIRE:
3211                 rdev->config.cik.max_shader_engines = 2;
3212                 rdev->config.cik.max_tile_pipes = 4;
3213                 rdev->config.cik.max_cu_per_sh = 7;
3214                 rdev->config.cik.max_sh_per_se = 1;
3215                 rdev->config.cik.max_backends_per_se = 2;
3216                 rdev->config.cik.max_texture_channel_caches = 4;
3217                 rdev->config.cik.max_gprs = 256;
3218                 rdev->config.cik.max_gs_threads = 32;
3219                 rdev->config.cik.max_hw_contexts = 8;
3220
3221                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3222                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3223                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3224                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3225                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3226                 break;
3227         case CHIP_HAWAII:
3228                 rdev->config.cik.max_shader_engines = 4;
3229                 rdev->config.cik.max_tile_pipes = 16;
3230                 rdev->config.cik.max_cu_per_sh = 11;
3231                 rdev->config.cik.max_sh_per_se = 1;
3232                 rdev->config.cik.max_backends_per_se = 4;
3233                 rdev->config.cik.max_texture_channel_caches = 16;
3234                 rdev->config.cik.max_gprs = 256;
3235                 rdev->config.cik.max_gs_threads = 32;
3236                 rdev->config.cik.max_hw_contexts = 8;
3237
3238                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3239                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3240                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3241                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3242                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3243                 break;
3244         case CHIP_KAVERI:
3245                 rdev->config.cik.max_shader_engines = 1;
3246                 rdev->config.cik.max_tile_pipes = 4;
3247                 if ((rdev->pdev->device == 0x1304) ||
3248                     (rdev->pdev->device == 0x1305) ||
3249                     (rdev->pdev->device == 0x130C) ||
3250                     (rdev->pdev->device == 0x130F) ||
3251                     (rdev->pdev->device == 0x1310) ||
3252                     (rdev->pdev->device == 0x1311) ||
3253                     (rdev->pdev->device == 0x131C)) {
3254                         rdev->config.cik.max_cu_per_sh = 8;
3255                         rdev->config.cik.max_backends_per_se = 2;
3256                 } else if ((rdev->pdev->device == 0x1309) ||
3257                            (rdev->pdev->device == 0x130A) ||
3258                            (rdev->pdev->device == 0x130D) ||
3259                            (rdev->pdev->device == 0x1313) ||
3260                            (rdev->pdev->device == 0x131D)) {
3261                         rdev->config.cik.max_cu_per_sh = 6;
3262                         rdev->config.cik.max_backends_per_se = 2;
3263                 } else if ((rdev->pdev->device == 0x1306) ||
3264                            (rdev->pdev->device == 0x1307) ||
3265                            (rdev->pdev->device == 0x130B) ||
3266                            (rdev->pdev->device == 0x130E) ||
3267                            (rdev->pdev->device == 0x1315) ||
3268                            (rdev->pdev->device == 0x1318) ||
3269                            (rdev->pdev->device == 0x131B)) {
3270                         rdev->config.cik.max_cu_per_sh = 4;
3271                         rdev->config.cik.max_backends_per_se = 1;
3272                 } else {
3273                         rdev->config.cik.max_cu_per_sh = 3;
3274                         rdev->config.cik.max_backends_per_se = 1;
3275                 }
3276                 rdev->config.cik.max_sh_per_se = 1;
3277                 rdev->config.cik.max_texture_channel_caches = 4;
3278                 rdev->config.cik.max_gprs = 256;
3279                 rdev->config.cik.max_gs_threads = 16;
3280                 rdev->config.cik.max_hw_contexts = 8;
3281
3282                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3283                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3284                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3285                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3286                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3287                 break;
3288         case CHIP_KABINI:
3289         case CHIP_MULLINS:
3290         default:
3291                 rdev->config.cik.max_shader_engines = 1;
3292                 rdev->config.cik.max_tile_pipes = 2;
3293                 rdev->config.cik.max_cu_per_sh = 2;
3294                 rdev->config.cik.max_sh_per_se = 1;
3295                 rdev->config.cik.max_backends_per_se = 1;
3296                 rdev->config.cik.max_texture_channel_caches = 2;
3297                 rdev->config.cik.max_gprs = 256;
3298                 rdev->config.cik.max_gs_threads = 16;
3299                 rdev->config.cik.max_hw_contexts = 8;
3300
3301                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3302                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3303                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3304                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3305                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3306                 break;
3307         }
3308
3309         /* Initialize HDP */
3310         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3311                 WREG32((0x2c14 + j), 0x00000000);
3312                 WREG32((0x2c18 + j), 0x00000000);
3313                 WREG32((0x2c1c + j), 0x00000000);
3314                 WREG32((0x2c20 + j), 0x00000000);
3315                 WREG32((0x2c24 + j), 0x00000000);
3316         }
3317
3318         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3319         WREG32(SRBM_INT_CNTL, 0x1);
3320         WREG32(SRBM_INT_ACK, 0x1);
3321
3322         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3323
3324         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3325         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3326
3327         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3328         rdev->config.cik.mem_max_burst_length_bytes = 256;
3329         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3330         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3331         if (rdev->config.cik.mem_row_size_in_kb > 4)
3332                 rdev->config.cik.mem_row_size_in_kb = 4;
3333         /* XXX use MC settings? */
3334         rdev->config.cik.shader_engine_tile_size = 32;
3335         rdev->config.cik.num_gpus = 1;
3336         rdev->config.cik.multi_gpu_tile_size = 64;
3337
3338         /* fix up row size */
3339         gb_addr_config &= ~ROW_SIZE_MASK;
3340         switch (rdev->config.cik.mem_row_size_in_kb) {
3341         case 1:
3342         default:
3343                 gb_addr_config |= ROW_SIZE(0);
3344                 break;
3345         case 2:
3346                 gb_addr_config |= ROW_SIZE(1);
3347                 break;
3348         case 4:
3349                 gb_addr_config |= ROW_SIZE(2);
3350                 break;
3351         }
3352
3353         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3354          * not have bank info, so create a custom tiling dword.
3355          * bits 3:0   num_pipes
3356          * bits 7:4   num_banks
3357          * bits 11:8  group_size
3358          * bits 15:12 row_size
3359          */
3360         rdev->config.cik.tile_config = 0;
3361         switch (rdev->config.cik.num_tile_pipes) {
3362         case 1:
3363                 rdev->config.cik.tile_config |= (0 << 0);
3364                 break;
3365         case 2:
3366                 rdev->config.cik.tile_config |= (1 << 0);
3367                 break;
3368         case 4:
3369                 rdev->config.cik.tile_config |= (2 << 0);
3370                 break;
3371         case 8:
3372         default:
3373                 /* XXX what about 12? */
3374                 rdev->config.cik.tile_config |= (3 << 0);
3375                 break;
3376         }
3377         rdev->config.cik.tile_config |=
3378                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3379         rdev->config.cik.tile_config |=
3380                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3381         rdev->config.cik.tile_config |=
3382                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3383
3384         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3385         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3386         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3387         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3388         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3389         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3390         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3391         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3392
3393         cik_tiling_mode_table_init(rdev);
3394
3395         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3396                      rdev->config.cik.max_sh_per_se,
3397                      rdev->config.cik.max_backends_per_se);
3398
3399         rdev->config.cik.active_cus = 0;
3400         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3401                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3402                         rdev->config.cik.active_cus +=
3403                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3404                 }
3405         }
3406
3407         /* set HW defaults for 3D engine */
3408         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3409
3410         mutex_lock(&rdev->grbm_idx_mutex);
3411         /*
3412          * making sure that the following register writes will be broadcasted
3413          * to all the shaders
3414          */
3415         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3416         WREG32(SX_DEBUG_1, 0x20);
3417
3418         WREG32(TA_CNTL_AUX, 0x00010000);
3419
3420         tmp = RREG32(SPI_CONFIG_CNTL);
3421         tmp |= 0x03000000;
3422         WREG32(SPI_CONFIG_CNTL, tmp);
3423
3424         WREG32(SQ_CONFIG, 1);
3425
3426         WREG32(DB_DEBUG, 0);
3427
3428         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3429         tmp |= 0x00000400;
3430         WREG32(DB_DEBUG2, tmp);
3431
3432         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3433         tmp |= 0x00020200;
3434         WREG32(DB_DEBUG3, tmp);
3435
3436         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3437         tmp |= 0x00018208;
3438         WREG32(CB_HW_CONTROL, tmp);
3439
3440         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3441
3442         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3443                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3444                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3445                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3446
3447         WREG32(VGT_NUM_INSTANCES, 1);
3448
3449         WREG32(CP_PERFMON_CNTL, 0);
3450
3451         WREG32(SQ_CONFIG, 0);
3452
3453         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3454                                           FORCE_EOV_MAX_REZ_CNT(255)));
3455
3456         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3457                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3458
3459         WREG32(VGT_GS_VERTEX_REUSE, 16);
3460         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3461
3462         tmp = RREG32(HDP_MISC_CNTL);
3463         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3464         WREG32(HDP_MISC_CNTL, tmp);
3465
3466         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3467         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3468
3469         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3470         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3471         mutex_unlock(&rdev->grbm_idx_mutex);
3472
3473         udelay(50);
3474 }
3475
3476 /*
3477  * GPU scratch registers helpers function.
3478  */
3479 /**
3480  * cik_scratch_init - setup driver info for CP scratch regs
3481  *
3482  * @rdev: radeon_device pointer
3483  *
3484  * Set up the number and offset of the CP scratch registers.
3485  * NOTE: use of CP scratch registers is a legacy inferface and
3486  * is not used by default on newer asics (r6xx+).  On newer asics,
3487  * memory buffers are used for fences rather than scratch regs.
3488  */
3489 static void cik_scratch_init(struct radeon_device *rdev)
3490 {
3491         int i;
3492
3493         rdev->scratch.num_reg = 7;
3494         rdev->scratch.reg_base = SCRATCH_REG0;
3495         for (i = 0; i < rdev->scratch.num_reg; i++) {
3496                 rdev->scratch.free[i] = true;
3497                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3498         }
3499 }
3500
3501 /**
3502  * cik_ring_test - basic gfx ring test
3503  *
3504  * @rdev: radeon_device pointer
3505  * @ring: radeon_ring structure holding ring information
3506  *
3507  * Allocate a scratch register and write to it using the gfx ring (CIK).
3508  * Provides a basic gfx ring test to verify that the ring is working.
3509  * Used by cik_cp_gfx_resume();
3510  * Returns 0 on success, error on failure.
3511  */
3512 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3513 {
3514         uint32_t scratch;
3515         uint32_t tmp = 0;
3516         unsigned i;
3517         int r;
3518
3519         r = radeon_scratch_get(rdev, &scratch);
3520         if (r) {
3521                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3522                 return r;
3523         }
3524         WREG32(scratch, 0xCAFEDEAD);
3525         r = radeon_ring_lock(rdev, ring, 3);
3526         if (r) {
3527                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3528                 radeon_scratch_free(rdev, scratch);
3529                 return r;
3530         }
3531         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3532         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3533         radeon_ring_write(ring, 0xDEADBEEF);
3534         radeon_ring_unlock_commit(rdev, ring, false);
3535
3536         for (i = 0; i < rdev->usec_timeout; i++) {
3537                 tmp = RREG32(scratch);
3538                 if (tmp == 0xDEADBEEF)
3539                         break;
3540                 DRM_UDELAY(1);
3541         }
3542         if (i < rdev->usec_timeout) {
3543                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3544         } else {
3545                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3546                           ring->idx, scratch, tmp);
3547                 r = -EINVAL;
3548         }
3549         radeon_scratch_free(rdev, scratch);
3550         return r;
3551 }
3552
3553 /**
3554  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3555  *
3556  * @rdev: radeon_device pointer
3557  * @ridx: radeon ring index
3558  *
3559  * Emits an hdp flush on the cp.
3560  */
3561 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3562                                        int ridx)
3563 {
3564         struct radeon_ring *ring = &rdev->ring[ridx];
3565         u32 ref_and_mask;
3566
3567         switch (ring->idx) {
3568         case CAYMAN_RING_TYPE_CP1_INDEX:
3569         case CAYMAN_RING_TYPE_CP2_INDEX:
3570         default:
3571                 switch (ring->me) {
3572                 case 0:
3573                         ref_and_mask = CP2 << ring->pipe;
3574                         break;
3575                 case 1:
3576                         ref_and_mask = CP6 << ring->pipe;
3577                         break;
3578                 default:
3579                         return;
3580                 }
3581                 break;
3582         case RADEON_RING_TYPE_GFX_INDEX:
3583                 ref_and_mask = CP0;
3584                 break;
3585         }
3586
3587         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3588         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3589                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3590                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3591         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3592         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3593         radeon_ring_write(ring, ref_and_mask);
3594         radeon_ring_write(ring, ref_and_mask);
3595         radeon_ring_write(ring, 0x20); /* poll interval */
3596 }
3597
3598 /**
3599  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3600  *
3601  * @rdev: radeon_device pointer
3602  * @fence: radeon fence object
3603  *
3604  * Emits a fence sequnce number on the gfx ring and flushes
3605  * GPU caches.
3606  */
3607 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3608                              struct radeon_fence *fence)
3609 {
3610         struct radeon_ring *ring = &rdev->ring[fence->ring];
3611         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3612
3613         /* Workaround for cache flush problems. First send a dummy EOP
3614          * event down the pipe with seq one below.
3615          */
3616         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3617         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3618                                  EOP_TC_ACTION_EN |
3619                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3620                                  EVENT_INDEX(5)));
3621         radeon_ring_write(ring, addr & 0xfffffffc);
3622         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3623                                 DATA_SEL(1) | INT_SEL(0));
3624         radeon_ring_write(ring, fence->seq - 1);
3625         radeon_ring_write(ring, 0);
3626
3627         /* Then send the real EOP event down the pipe. */
3628         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3629         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3630                                  EOP_TC_ACTION_EN |
3631                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3632                                  EVENT_INDEX(5)));
3633         radeon_ring_write(ring, addr & 0xfffffffc);
3634         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3635         radeon_ring_write(ring, fence->seq);
3636         radeon_ring_write(ring, 0);
3637 }
3638
3639 /**
3640  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3641  *
3642  * @rdev: radeon_device pointer
3643  * @fence: radeon fence object
3644  *
3645  * Emits a fence sequnce number on the compute ring and flushes
3646  * GPU caches.
3647  */
3648 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3649                                  struct radeon_fence *fence)
3650 {
3651         struct radeon_ring *ring = &rdev->ring[fence->ring];
3652         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3653
3654         /* RELEASE_MEM - flush caches, send int */
3655         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3656         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3657                                  EOP_TC_ACTION_EN |
3658                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3659                                  EVENT_INDEX(5)));
3660         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3661         radeon_ring_write(ring, addr & 0xfffffffc);
3662         radeon_ring_write(ring, upper_32_bits(addr));
3663         radeon_ring_write(ring, fence->seq);
3664         radeon_ring_write(ring, 0);
3665 }
3666
3667 /**
3668  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3669  *
3670  * @rdev: radeon_device pointer
3671  * @ring: radeon ring buffer object
3672  * @semaphore: radeon semaphore object
3673  * @emit_wait: Is this a sempahore wait?
3674  *
3675  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3676  * from running ahead of semaphore waits.
3677  */
3678 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3679                              struct radeon_ring *ring,
3680                              struct radeon_semaphore *semaphore,
3681                              bool emit_wait)
3682 {
3683         uint64_t addr = semaphore->gpu_addr;
3684         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3685
3686         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3687         radeon_ring_write(ring, lower_32_bits(addr));
3688         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3689
3690         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3691                 /* Prevent the PFP from running ahead of the semaphore wait */
3692                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3693                 radeon_ring_write(ring, 0x0);
3694         }
3695
3696         return true;
3697 }
3698
3699 /**
3700  * cik_copy_cpdma - copy pages using the CP DMA engine
3701  *
3702  * @rdev: radeon_device pointer
3703  * @src_offset: src GPU address
3704  * @dst_offset: dst GPU address
3705  * @num_gpu_pages: number of GPU pages to xfer
3706  * @resv: reservation object to sync to
3707  *
3708  * Copy GPU paging using the CP DMA engine (CIK+).
3709  * Used by the radeon ttm implementation to move pages if
3710  * registered as the asic copy callback.
3711  */
3712 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3713                                     uint64_t src_offset, uint64_t dst_offset,
3714                                     unsigned num_gpu_pages,
3715                                     struct reservation_object *resv)
3716 {
3717         struct radeon_fence *fence;
3718         struct radeon_sync sync;
3719         int ring_index = rdev->asic->copy.blit_ring_index;
3720         struct radeon_ring *ring = &rdev->ring[ring_index];
3721         u32 size_in_bytes, cur_size_in_bytes, control;
3722         int i, num_loops;
3723         int r = 0;
3724
3725         radeon_sync_create(&sync);
3726
3727         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3728         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3729         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3730         if (r) {
3731                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3732                 radeon_sync_free(rdev, &sync, NULL);
3733                 return ERR_PTR(r);
3734         }
3735
3736         radeon_sync_resv(rdev, &sync, resv, false);
3737         radeon_sync_rings(rdev, &sync, ring->idx);
3738
3739         for (i = 0; i < num_loops; i++) {
3740                 cur_size_in_bytes = size_in_bytes;
3741                 if (cur_size_in_bytes > 0x1fffff)
3742                         cur_size_in_bytes = 0x1fffff;
3743                 size_in_bytes -= cur_size_in_bytes;
3744                 control = 0;
3745                 if (size_in_bytes == 0)
3746                         control |= PACKET3_DMA_DATA_CP_SYNC;
3747                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3748                 radeon_ring_write(ring, control);
3749                 radeon_ring_write(ring, lower_32_bits(src_offset));
3750                 radeon_ring_write(ring, upper_32_bits(src_offset));
3751                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3752                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3753                 radeon_ring_write(ring, cur_size_in_bytes);
3754                 src_offset += cur_size_in_bytes;
3755                 dst_offset += cur_size_in_bytes;
3756         }
3757
3758         r = radeon_fence_emit(rdev, &fence, ring->idx);
3759         if (r) {
3760                 radeon_ring_unlock_undo(rdev, ring);
3761                 radeon_sync_free(rdev, &sync, NULL);
3762                 return ERR_PTR(r);
3763         }
3764
3765         radeon_ring_unlock_commit(rdev, ring, false);
3766         radeon_sync_free(rdev, &sync, fence);
3767
3768         return fence;
3769 }
3770
3771 /*
3772  * IB stuff
3773  */
3774 /**
3775  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3776  *
3777  * @rdev: radeon_device pointer
3778  * @ib: radeon indirect buffer object
3779  *
3780  * Emits a DE (drawing engine) or CE (constant engine) IB
3781  * on the gfx ring.  IBs are usually generated by userspace
3782  * acceleration drivers and submitted to the kernel for
3783  * scheduling on the ring.  This function schedules the IB
3784  * on the gfx ring for execution by the GPU.
3785  */
3786 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3787 {
3788         struct radeon_ring *ring = &rdev->ring[ib->ring];
3789         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3790         u32 header, control = INDIRECT_BUFFER_VALID;
3791
3792         if (ib->is_const_ib) {
3793                 /* set switch buffer packet before const IB */
3794                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3795                 radeon_ring_write(ring, 0);
3796
3797                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3798         } else {
3799                 u32 next_rptr;
3800                 if (ring->rptr_save_reg) {
3801                         next_rptr = ring->wptr + 3 + 4;
3802                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3803                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3804                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3805                         radeon_ring_write(ring, next_rptr);
3806                 } else if (rdev->wb.enabled) {
3807                         next_rptr = ring->wptr + 5 + 4;
3808                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3809                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3810                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3811                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3812                         radeon_ring_write(ring, next_rptr);
3813                 }
3814
3815                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3816         }
3817
3818         control |= ib->length_dw | (vm_id << 24);
3819
3820         radeon_ring_write(ring, header);
3821         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3822         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3823         radeon_ring_write(ring, control);
3824 }
3825
3826 /**
3827  * cik_ib_test - basic gfx ring IB test
3828  *
3829  * @rdev: radeon_device pointer
3830  * @ring: radeon_ring structure holding ring information
3831  *
3832  * Allocate an IB and execute it on the gfx ring (CIK).
3833  * Provides a basic gfx ring test to verify that IBs are working.
3834  * Returns 0 on success, error on failure.
3835  */
3836 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3837 {
3838         struct radeon_ib ib;
3839         uint32_t scratch;
3840         uint32_t tmp = 0;
3841         unsigned i;
3842         int r;
3843
3844         r = radeon_scratch_get(rdev, &scratch);
3845         if (r) {
3846                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3847                 return r;
3848         }
3849         WREG32(scratch, 0xCAFEDEAD);
3850         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3851         if (r) {
3852                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3853                 radeon_scratch_free(rdev, scratch);
3854                 return r;
3855         }
3856         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3857         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3858         ib.ptr[2] = 0xDEADBEEF;
3859         ib.length_dw = 3;
3860         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3861         if (r) {
3862                 radeon_scratch_free(rdev, scratch);
3863                 radeon_ib_free(rdev, &ib);
3864                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3865                 return r;
3866         }
3867         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3868                 RADEON_USEC_IB_TEST_TIMEOUT));
3869         if (r < 0) {
3870                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3871                 radeon_scratch_free(rdev, scratch);
3872                 radeon_ib_free(rdev, &ib);
3873                 return r;
3874         } else if (r == 0) {
3875                 DRM_ERROR("radeon: fence wait timed out.\n");
3876                 radeon_scratch_free(rdev, scratch);
3877                 radeon_ib_free(rdev, &ib);
3878                 return -ETIMEDOUT;
3879         }
3880         r = 0;
3881         for (i = 0; i < rdev->usec_timeout; i++) {
3882                 tmp = RREG32(scratch);
3883                 if (tmp == 0xDEADBEEF)
3884                         break;
3885                 DRM_UDELAY(1);
3886         }
3887         if (i < rdev->usec_timeout) {
3888                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3889         } else {
3890                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3891                           scratch, tmp);
3892                 r = -EINVAL;
3893         }
3894         radeon_scratch_free(rdev, scratch);
3895         radeon_ib_free(rdev, &ib);
3896         return r;
3897 }
3898
3899 /*
3900  * CP.
3901  * On CIK, gfx and compute now have independant command processors.
3902  *
3903  * GFX
3904  * Gfx consists of a single ring and can process both gfx jobs and
3905  * compute jobs.  The gfx CP consists of three microengines (ME):
3906  * PFP - Pre-Fetch Parser
3907  * ME - Micro Engine
3908  * CE - Constant Engine
3909  * The PFP and ME make up what is considered the Drawing Engine (DE).
3910  * The CE is an asynchronous engine used for updating buffer desciptors
3911  * used by the DE so that they can be loaded into cache in parallel
3912  * while the DE is processing state update packets.
3913  *
3914  * Compute
3915  * The compute CP consists of two microengines (ME):
3916  * MEC1 - Compute MicroEngine 1
3917  * MEC2 - Compute MicroEngine 2
3918  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3919  * The queues are exposed to userspace and are programmed directly
3920  * by the compute runtime.
3921  */
3922 /**
3923  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3924  *
3925  * @rdev: radeon_device pointer
3926  * @enable: enable or disable the MEs
3927  *
3928  * Halts or unhalts the gfx MEs.
3929  */
3930 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3931 {
3932         if (enable)
3933                 WREG32(CP_ME_CNTL, 0);
3934         else {
3935                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3936                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3937                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3938                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3939         }
3940         udelay(50);
3941 }
3942
3943 /**
3944  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3945  *
3946  * @rdev: radeon_device pointer
3947  *
3948  * Loads the gfx PFP, ME, and CE ucode.
3949  * Returns 0 for success, -EINVAL if the ucode is not available.
3950  */
3951 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3952 {
3953         int i;
3954
3955         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3956                 return -EINVAL;
3957
3958         cik_cp_gfx_enable(rdev, false);
3959
3960         if (rdev->new_fw) {
3961                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3962                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3963                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3964                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3965                 const struct gfx_firmware_header_v1_0 *me_hdr =
3966                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3967                 const __le32 *fw_data;
3968                 u32 fw_size;
3969
3970                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3971                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3972                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3973
3974                 /* PFP */
3975                 fw_data = (const __le32 *)
3976                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3977                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3978                 WREG32(CP_PFP_UCODE_ADDR, 0);
3979                 for (i = 0; i < fw_size; i++)
3980                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3981                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3982
3983                 /* CE */
3984                 fw_data = (const __le32 *)
3985                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3986                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3987                 WREG32(CP_CE_UCODE_ADDR, 0);
3988                 for (i = 0; i < fw_size; i++)
3989                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3990                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3991
3992                 /* ME */
3993                 fw_data = (const __be32 *)
3994                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3995                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3996                 WREG32(CP_ME_RAM_WADDR, 0);
3997                 for (i = 0; i < fw_size; i++)
3998                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3999                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4000                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4001         } else {
4002                 const __be32 *fw_data;
4003
4004                 /* PFP */
4005                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4006                 WREG32(CP_PFP_UCODE_ADDR, 0);
4007                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4008                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4009                 WREG32(CP_PFP_UCODE_ADDR, 0);
4010
4011                 /* CE */
4012                 fw_data = (const __be32 *)rdev->ce_fw->data;
4013                 WREG32(CP_CE_UCODE_ADDR, 0);
4014                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4015                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4016                 WREG32(CP_CE_UCODE_ADDR, 0);
4017
4018                 /* ME */
4019                 fw_data = (const __be32 *)rdev->me_fw->data;
4020                 WREG32(CP_ME_RAM_WADDR, 0);
4021                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4022                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4023                 WREG32(CP_ME_RAM_WADDR, 0);
4024         }
4025
4026         return 0;
4027 }
4028
4029 /**
4030  * cik_cp_gfx_start - start the gfx ring
4031  *
4032  * @rdev: radeon_device pointer
4033  *
4034  * Enables the ring and loads the clear state context and other
4035  * packets required to init the ring.
4036  * Returns 0 for success, error for failure.
4037  */
4038 static int cik_cp_gfx_start(struct radeon_device *rdev)
4039 {
4040         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4041         int r, i;
4042
4043         /* init the CP */
4044         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4045         WREG32(CP_ENDIAN_SWAP, 0);
4046         WREG32(CP_DEVICE_ID, 1);
4047
4048         cik_cp_gfx_enable(rdev, true);
4049
4050         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4051         if (r) {
4052                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4053                 return r;
4054         }
4055
4056         /* init the CE partitions.  CE only used for gfx on CIK */
4057         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4058         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4059         radeon_ring_write(ring, 0x8000);
4060         radeon_ring_write(ring, 0x8000);
4061
4062         /* setup clear context state */
4063         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4064         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4065
4066         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4067         radeon_ring_write(ring, 0x80000000);
4068         radeon_ring_write(ring, 0x80000000);
4069
4070         for (i = 0; i < cik_default_size; i++)
4071                 radeon_ring_write(ring, cik_default_state[i]);
4072
4073         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4074         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4075
4076         /* set clear context state */
4077         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4078         radeon_ring_write(ring, 0);
4079
4080         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4081         radeon_ring_write(ring, 0x00000316);
4082         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4083         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4084
4085         radeon_ring_unlock_commit(rdev, ring, false);
4086
4087         return 0;
4088 }
4089
4090 /**
4091  * cik_cp_gfx_fini - stop the gfx ring
4092  *
4093  * @rdev: radeon_device pointer
4094  *
4095  * Stop the gfx ring and tear down the driver ring
4096  * info.
4097  */
4098 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4099 {
4100         cik_cp_gfx_enable(rdev, false);
4101         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102 }
4103
4104 /**
4105  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4106  *
4107  * @rdev: radeon_device pointer
4108  *
4109  * Program the location and size of the gfx ring buffer
4110  * and test it to make sure it's working.
4111  * Returns 0 for success, error for failure.
4112  */
4113 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4114 {
4115         struct radeon_ring *ring;
4116         u32 tmp;
4117         u32 rb_bufsz;
4118         u64 rb_addr;
4119         int r;
4120
4121         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4122         if (rdev->family != CHIP_HAWAII)
4123                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4124
4125         /* Set the write pointer delay */
4126         WREG32(CP_RB_WPTR_DELAY, 0);
4127
4128         /* set the RB to use vmid 0 */
4129         WREG32(CP_RB_VMID, 0);
4130
4131         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4132
4133         /* ring 0 - compute and gfx */
4134         /* Set ring buffer size */
4135         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4136         rb_bufsz = order_base_2(ring->ring_size / 8);
4137         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4138 #ifdef __BIG_ENDIAN
4139         tmp |= BUF_SWAP_32BIT;
4140 #endif
4141         WREG32(CP_RB0_CNTL, tmp);
4142
4143         /* Initialize the ring buffer's read and write pointers */
4144         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4145         ring->wptr = 0;
4146         WREG32(CP_RB0_WPTR, ring->wptr);
4147
4148         /* set the wb address wether it's enabled or not */
4149         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4150         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4151
4152         /* scratch register shadowing is no longer supported */
4153         WREG32(SCRATCH_UMSK, 0);
4154
4155         if (!rdev->wb.enabled)
4156                 tmp |= RB_NO_UPDATE;
4157
4158         mdelay(1);
4159         WREG32(CP_RB0_CNTL, tmp);
4160
4161         rb_addr = ring->gpu_addr >> 8;
4162         WREG32(CP_RB0_BASE, rb_addr);
4163         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4164
4165         /* start the ring */
4166         cik_cp_gfx_start(rdev);
4167         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4168         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4169         if (r) {
4170                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4171                 return r;
4172         }
4173
4174         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4175                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4176
4177         return 0;
4178 }
4179
4180 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4181                      struct radeon_ring *ring)
4182 {
4183         u32 rptr;
4184
4185         if (rdev->wb.enabled)
4186                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4187         else
4188                 rptr = RREG32(CP_RB0_RPTR);
4189
4190         return rptr;
4191 }
4192
4193 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4194                      struct radeon_ring *ring)
4195 {
4196         u32 wptr;
4197
4198         wptr = RREG32(CP_RB0_WPTR);
4199
4200         return wptr;
4201 }
4202
4203 void cik_gfx_set_wptr(struct radeon_device *rdev,
4204                       struct radeon_ring *ring)
4205 {
4206         WREG32(CP_RB0_WPTR, ring->wptr);
4207         (void)RREG32(CP_RB0_WPTR);
4208 }
4209
4210 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4211                          struct radeon_ring *ring)
4212 {
4213         u32 rptr;
4214
4215         if (rdev->wb.enabled) {
4216                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4217         } else {
4218                 mutex_lock(&rdev->srbm_mutex);
4219                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4220                 rptr = RREG32(CP_HQD_PQ_RPTR);
4221                 cik_srbm_select(rdev, 0, 0, 0, 0);
4222                 mutex_unlock(&rdev->srbm_mutex);
4223         }
4224
4225         return rptr;
4226 }
4227
4228 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4229                          struct radeon_ring *ring)
4230 {
4231         u32 wptr;
4232
4233         if (rdev->wb.enabled) {
4234                 /* XXX check if swapping is necessary on BE */
4235                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4236         } else {
4237                 mutex_lock(&rdev->srbm_mutex);
4238                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4239                 wptr = RREG32(CP_HQD_PQ_WPTR);
4240                 cik_srbm_select(rdev, 0, 0, 0, 0);
4241                 mutex_unlock(&rdev->srbm_mutex);
4242         }
4243
4244         return wptr;
4245 }
4246
4247 void cik_compute_set_wptr(struct radeon_device *rdev,
4248                           struct radeon_ring *ring)
4249 {
4250         /* XXX check if swapping is necessary on BE */
4251         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4252         WDOORBELL32(ring->doorbell_index, ring->wptr);
4253 }
4254
4255 static void cik_compute_stop(struct radeon_device *rdev,
4256                              struct radeon_ring *ring)
4257 {
4258         u32 j, tmp;
4259
4260         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4261         /* Disable wptr polling. */
4262         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4263         tmp &= ~WPTR_POLL_EN;
4264         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4265         /* Disable HQD. */
4266         if (RREG32(CP_HQD_ACTIVE) & 1) {
4267                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4268                 for (j = 0; j < rdev->usec_timeout; j++) {
4269                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4270                                 break;
4271                         udelay(1);
4272                 }
4273                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4274                 WREG32(CP_HQD_PQ_RPTR, 0);
4275                 WREG32(CP_HQD_PQ_WPTR, 0);
4276         }
4277         cik_srbm_select(rdev, 0, 0, 0, 0);
4278 }
4279
4280 /**
4281  * cik_cp_compute_enable - enable/disable the compute CP MEs
4282  *
4283  * @rdev: radeon_device pointer
4284  * @enable: enable or disable the MEs
4285  *
4286  * Halts or unhalts the compute MEs.
4287  */
4288 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4289 {
4290         if (enable)
4291                 WREG32(CP_MEC_CNTL, 0);
4292         else {
4293                 /*
4294                  * To make hibernation reliable we need to clear compute ring
4295                  * configuration before halting the compute ring.
4296                  */
4297                 mutex_lock(&rdev->srbm_mutex);
4298                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4299                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4300                 mutex_unlock(&rdev->srbm_mutex);
4301
4302                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4303                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4304                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4305         }
4306         udelay(50);
4307 }
4308
4309 /**
4310  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4311  *
4312  * @rdev: radeon_device pointer
4313  *
4314  * Loads the compute MEC1&2 ucode.
4315  * Returns 0 for success, -EINVAL if the ucode is not available.
4316  */
4317 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4318 {
4319         int i;
4320
4321         if (!rdev->mec_fw)
4322                 return -EINVAL;
4323
4324         cik_cp_compute_enable(rdev, false);
4325
4326         if (rdev->new_fw) {
4327                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4328                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4329                 const __le32 *fw_data;
4330                 u32 fw_size;
4331
4332                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4333
4334                 /* MEC1 */
4335                 fw_data = (const __le32 *)
4336                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4337                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4338                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4339                 for (i = 0; i < fw_size; i++)
4340                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4341                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4342
4343                 /* MEC2 */
4344                 if (rdev->family == CHIP_KAVERI) {
4345                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4346                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4347
4348                         fw_data = (const __le32 *)
4349                                 (rdev->mec2_fw->data +
4350                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4351                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4352                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4353                         for (i = 0; i < fw_size; i++)
4354                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4355                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4356                 }
4357         } else {
4358                 const __be32 *fw_data;
4359
4360                 /* MEC1 */
4361                 fw_data = (const __be32 *)rdev->mec_fw->data;
4362                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4363                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4364                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4365                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4366
4367                 if (rdev->family == CHIP_KAVERI) {
4368                         /* MEC2 */
4369                         fw_data = (const __be32 *)rdev->mec_fw->data;
4370                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4371                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4372                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4373                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4374                 }
4375         }
4376
4377         return 0;
4378 }
4379
4380 /**
4381  * cik_cp_compute_start - start the compute queues
4382  *
4383  * @rdev: radeon_device pointer
4384  *
4385  * Enable the compute queues.
4386  * Returns 0 for success, error for failure.
4387  */
4388 static int cik_cp_compute_start(struct radeon_device *rdev)
4389 {
4390         cik_cp_compute_enable(rdev, true);
4391
4392         return 0;
4393 }
4394
4395 /**
4396  * cik_cp_compute_fini - stop the compute queues
4397  *
4398  * @rdev: radeon_device pointer
4399  *
4400  * Stop the compute queues and tear down the driver queue
4401  * info.
4402  */
4403 static void cik_cp_compute_fini(struct radeon_device *rdev)
4404 {
4405         int i, idx, r;
4406
4407         cik_cp_compute_enable(rdev, false);
4408
4409         for (i = 0; i < 2; i++) {
4410                 if (i == 0)
4411                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4412                 else
4413                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4414
4415                 if (rdev->ring[idx].mqd_obj) {
4416                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4417                         if (unlikely(r != 0))
4418                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4419
4420                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4421                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4422
4423                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4424                         rdev->ring[idx].mqd_obj = NULL;
4425                 }
4426         }
4427 }
4428
4429 static void cik_mec_fini(struct radeon_device *rdev)
4430 {
4431         int r;
4432
4433         if (rdev->mec.hpd_eop_obj) {
4434                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4435                 if (unlikely(r != 0))
4436                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4437                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4438                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4439
4440                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4441                 rdev->mec.hpd_eop_obj = NULL;
4442         }
4443 }
4444
4445 #define MEC_HPD_SIZE 2048
4446
4447 static int cik_mec_init(struct radeon_device *rdev)
4448 {
4449         int r;
4450         u32 *hpd;
4451
4452         /*
4453          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4454          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4455          * Nonetheless, we assign only 1 pipe because all other pipes will
4456          * be handled by KFD
4457          */
4458         rdev->mec.num_mec = 1;
4459         rdev->mec.num_pipe = 1;
4460         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4461
4462         if (rdev->mec.hpd_eop_obj == NULL) {
4463                 r = radeon_bo_create(rdev,
4464                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4465                                      PAGE_SIZE, true,
4466                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4467                                      &rdev->mec.hpd_eop_obj);
4468                 if (r) {
4469                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4470                         return r;
4471                 }
4472         }
4473
4474         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4475         if (unlikely(r != 0)) {
4476                 cik_mec_fini(rdev);
4477                 return r;
4478         }
4479         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4480                           &rdev->mec.hpd_eop_gpu_addr);
4481         if (r) {
4482                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4483                 cik_mec_fini(rdev);
4484                 return r;
4485         }
4486         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4487         if (r) {
4488                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4489                 cik_mec_fini(rdev);
4490                 return r;
4491         }
4492
4493         /* clear memory.  Not sure if this is required or not */
4494         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4495
4496         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4497         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4498
4499         return 0;
4500 }
4501
4502 struct hqd_registers
4503 {
4504         u32 cp_mqd_base_addr;
4505         u32 cp_mqd_base_addr_hi;
4506         u32 cp_hqd_active;
4507         u32 cp_hqd_vmid;
4508         u32 cp_hqd_persistent_state;
4509         u32 cp_hqd_pipe_priority;
4510         u32 cp_hqd_queue_priority;
4511         u32 cp_hqd_quantum;
4512         u32 cp_hqd_pq_base;
4513         u32 cp_hqd_pq_base_hi;
4514         u32 cp_hqd_pq_rptr;
4515         u32 cp_hqd_pq_rptr_report_addr;
4516         u32 cp_hqd_pq_rptr_report_addr_hi;
4517         u32 cp_hqd_pq_wptr_poll_addr;
4518         u32 cp_hqd_pq_wptr_poll_addr_hi;
4519         u32 cp_hqd_pq_doorbell_control;
4520         u32 cp_hqd_pq_wptr;
4521         u32 cp_hqd_pq_control;
4522         u32 cp_hqd_ib_base_addr;
4523         u32 cp_hqd_ib_base_addr_hi;
4524         u32 cp_hqd_ib_rptr;
4525         u32 cp_hqd_ib_control;
4526         u32 cp_hqd_iq_timer;
4527         u32 cp_hqd_iq_rptr;
4528         u32 cp_hqd_dequeue_request;
4529         u32 cp_hqd_dma_offload;
4530         u32 cp_hqd_sema_cmd;
4531         u32 cp_hqd_msg_type;
4532         u32 cp_hqd_atomic0_preop_lo;
4533         u32 cp_hqd_atomic0_preop_hi;
4534         u32 cp_hqd_atomic1_preop_lo;
4535         u32 cp_hqd_atomic1_preop_hi;
4536         u32 cp_hqd_hq_scheduler0;
4537         u32 cp_hqd_hq_scheduler1;
4538         u32 cp_mqd_control;
4539 };
4540
4541 struct bonaire_mqd
4542 {
4543         u32 header;
4544         u32 dispatch_initiator;
4545         u32 dimensions[3];
4546         u32 start_idx[3];
4547         u32 num_threads[3];
4548         u32 pipeline_stat_enable;
4549         u32 perf_counter_enable;
4550         u32 pgm[2];
4551         u32 tba[2];
4552         u32 tma[2];
4553         u32 pgm_rsrc[2];
4554         u32 vmid;
4555         u32 resource_limits;
4556         u32 static_thread_mgmt01[2];
4557         u32 tmp_ring_size;
4558         u32 static_thread_mgmt23[2];
4559         u32 restart[3];
4560         u32 thread_trace_enable;
4561         u32 reserved1;
4562         u32 user_data[16];
4563         u32 vgtcs_invoke_count[2];
4564         struct hqd_registers queue_state;
4565         u32 dequeue_cntr;
4566         u32 interrupt_queue[64];
4567 };
4568
4569 /**
4570  * cik_cp_compute_resume - setup the compute queue registers
4571  *
4572  * @rdev: radeon_device pointer
4573  *
4574  * Program the compute queues and test them to make sure they
4575  * are working.
4576  * Returns 0 for success, error for failure.
4577  */
4578 static int cik_cp_compute_resume(struct radeon_device *rdev)
4579 {
4580         int r, i, j, idx;
4581         u32 tmp;
4582         bool use_doorbell = true;
4583         u64 hqd_gpu_addr;
4584         u64 mqd_gpu_addr;
4585         u64 eop_gpu_addr;
4586         u64 wb_gpu_addr;
4587         u32 *buf;
4588         struct bonaire_mqd *mqd;
4589
4590         r = cik_cp_compute_start(rdev);
4591         if (r)
4592                 return r;
4593
4594         /* fix up chicken bits */
4595         tmp = RREG32(CP_CPF_DEBUG);
4596         tmp |= (1 << 23);
4597         WREG32(CP_CPF_DEBUG, tmp);
4598
4599         /* init the pipes */
4600         mutex_lock(&rdev->srbm_mutex);
4601
4602         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4603
4604         cik_srbm_select(rdev, 0, 0, 0, 0);
4605
4606         /* write the EOP addr */
4607         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4608         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4609
4610         /* set the VMID assigned */
4611         WREG32(CP_HPD_EOP_VMID, 0);
4612
4613         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4614         tmp = RREG32(CP_HPD_EOP_CONTROL);
4615         tmp &= ~EOP_SIZE_MASK;
4616         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4617         WREG32(CP_HPD_EOP_CONTROL, tmp);
4618
4619         mutex_unlock(&rdev->srbm_mutex);
4620
4621         /* init the queues.  Just two for now. */
4622         for (i = 0; i < 2; i++) {
4623                 if (i == 0)
4624                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4625                 else
4626                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4627
4628                 if (rdev->ring[idx].mqd_obj == NULL) {
4629                         r = radeon_bo_create(rdev,
4630                                              sizeof(struct bonaire_mqd),
4631                                              PAGE_SIZE, true,
4632                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4633                                              NULL, &rdev->ring[idx].mqd_obj);
4634                         if (r) {
4635                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4636                                 return r;
4637                         }
4638                 }
4639
4640                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4641                 if (unlikely(r != 0)) {
4642                         cik_cp_compute_fini(rdev);
4643                         return r;
4644                 }
4645                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4646                                   &mqd_gpu_addr);
4647                 if (r) {
4648                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4649                         cik_cp_compute_fini(rdev);
4650                         return r;
4651                 }
4652                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4653                 if (r) {
4654                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4655                         cik_cp_compute_fini(rdev);
4656                         return r;
4657                 }
4658
4659                 /* init the mqd struct */
4660                 memset(buf, 0, sizeof(struct bonaire_mqd));
4661
4662                 mqd = (struct bonaire_mqd *)buf;
4663                 mqd->header = 0xC0310800;
4664                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4665                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4666                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4667                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4668
4669                 mutex_lock(&rdev->srbm_mutex);
4670                 cik_srbm_select(rdev, rdev->ring[idx].me,
4671                                 rdev->ring[idx].pipe,
4672                                 rdev->ring[idx].queue, 0);
4673
4674                 /* disable wptr polling */
4675                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4676                 tmp &= ~WPTR_POLL_EN;
4677                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4678
4679                 /* enable doorbell? */
4680                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4681                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4682                 if (use_doorbell)
4683                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4684                 else
4685                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4686                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4687                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4688
4689                 /* disable the queue if it's active */
4690                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4691                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4692                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4693                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4694                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4695                         for (j = 0; j < rdev->usec_timeout; j++) {
4696                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4697                                         break;
4698                                 udelay(1);
4699                         }
4700                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4701                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4702                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4703                 }
4704
4705                 /* set the pointer to the MQD */
4706                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4707                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4708                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4709                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4710                 /* set MQD vmid to 0 */
4711                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4712                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4713                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4714
4715                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4716                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4717                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4718                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4719                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4720                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4721
4722                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4723                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4724                 mqd->queue_state.cp_hqd_pq_control &=
4725                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4726
4727                 mqd->queue_state.cp_hqd_pq_control |=
4728                         order_base_2(rdev->ring[idx].ring_size / 8);
4729                 mqd->queue_state.cp_hqd_pq_control |=
4730                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4731 #ifdef __BIG_ENDIAN
4732                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4733 #endif
4734                 mqd->queue_state.cp_hqd_pq_control &=
4735                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4736                 mqd->queue_state.cp_hqd_pq_control |=
4737                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4738                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4739
4740                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4741                 if (i == 0)
4742                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4743                 else
4744                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4745                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4746                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4747                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4748                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4749                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4750
4751                 /* set the wb address wether it's enabled or not */
4752                 if (i == 0)
4753                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4754                 else
4755                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4756                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4757                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4758                         upper_32_bits(wb_gpu_addr) & 0xffff;
4759                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4760                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4761                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4762                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4763
4764                 /* enable the doorbell if requested */
4765                 if (use_doorbell) {
4766                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4767                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4768                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4769                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4770                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4771                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4772                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4773                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4774
4775                 } else {
4776                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4777                 }
4778                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4779                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4780
4781                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4782                 rdev->ring[idx].wptr = 0;
4783                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4784                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4785                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4786
4787                 /* set the vmid for the queue */
4788                 mqd->queue_state.cp_hqd_vmid = 0;
4789                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4790
4791                 /* activate the queue */
4792                 mqd->queue_state.cp_hqd_active = 1;
4793                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4794
4795                 cik_srbm_select(rdev, 0, 0, 0, 0);
4796                 mutex_unlock(&rdev->srbm_mutex);
4797
4798                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4799                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4800
4801                 rdev->ring[idx].ready = true;
4802                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4803                 if (r)
4804                         rdev->ring[idx].ready = false;
4805         }
4806
4807         return 0;
4808 }
4809
4810 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4811 {
4812         cik_cp_gfx_enable(rdev, enable);
4813         cik_cp_compute_enable(rdev, enable);
4814 }
4815
4816 static int cik_cp_load_microcode(struct radeon_device *rdev)
4817 {
4818         int r;
4819
4820         r = cik_cp_gfx_load_microcode(rdev);
4821         if (r)
4822                 return r;
4823         r = cik_cp_compute_load_microcode(rdev);
4824         if (r)
4825                 return r;
4826
4827         return 0;
4828 }
4829
4830 static void cik_cp_fini(struct radeon_device *rdev)
4831 {
4832         cik_cp_gfx_fini(rdev);
4833         cik_cp_compute_fini(rdev);
4834 }
4835
4836 static int cik_cp_resume(struct radeon_device *rdev)
4837 {
4838         int r;
4839
4840         cik_enable_gui_idle_interrupt(rdev, false);
4841
4842         r = cik_cp_load_microcode(rdev);
4843         if (r)
4844                 return r;
4845
4846         r = cik_cp_gfx_resume(rdev);
4847         if (r)
4848                 return r;
4849         r = cik_cp_compute_resume(rdev);
4850         if (r)
4851                 return r;
4852
4853         cik_enable_gui_idle_interrupt(rdev, true);
4854
4855         return 0;
4856 }
4857
4858 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4859 {
4860         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4861                 RREG32(GRBM_STATUS));
4862         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4863                 RREG32(GRBM_STATUS2));
4864         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4865                 RREG32(GRBM_STATUS_SE0));
4866         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4867                 RREG32(GRBM_STATUS_SE1));
4868         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4869                 RREG32(GRBM_STATUS_SE2));
4870         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4871                 RREG32(GRBM_STATUS_SE3));
4872         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4873                 RREG32(SRBM_STATUS));
4874         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4875                 RREG32(SRBM_STATUS2));
4876         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4877                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4878         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4879                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4880         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4881         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4882                  RREG32(CP_STALLED_STAT1));
4883         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4884                  RREG32(CP_STALLED_STAT2));
4885         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4886                  RREG32(CP_STALLED_STAT3));
4887         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4888                  RREG32(CP_CPF_BUSY_STAT));
4889         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4890                  RREG32(CP_CPF_STALLED_STAT1));
4891         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4892         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4893         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4894                  RREG32(CP_CPC_STALLED_STAT1));
4895         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4896 }
4897
4898 /**
4899  * cik_gpu_check_soft_reset - check which blocks are busy
4900  *
4901  * @rdev: radeon_device pointer
4902  *
4903  * Check which blocks are busy and return the relevant reset
4904  * mask to be used by cik_gpu_soft_reset().
4905  * Returns a mask of the blocks to be reset.
4906  */
4907 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4908 {
4909         u32 reset_mask = 0;
4910         u32 tmp;
4911
4912         /* GRBM_STATUS */
4913         tmp = RREG32(GRBM_STATUS);
4914         if (tmp & (PA_BUSY | SC_BUSY |
4915                    BCI_BUSY | SX_BUSY |
4916                    TA_BUSY | VGT_BUSY |
4917                    DB_BUSY | CB_BUSY |
4918                    GDS_BUSY | SPI_BUSY |
4919                    IA_BUSY | IA_BUSY_NO_DMA))
4920                 reset_mask |= RADEON_RESET_GFX;
4921
4922         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4923                 reset_mask |= RADEON_RESET_CP;
4924
4925         /* GRBM_STATUS2 */
4926         tmp = RREG32(GRBM_STATUS2);
4927         if (tmp & RLC_BUSY)
4928                 reset_mask |= RADEON_RESET_RLC;
4929
4930         /* SDMA0_STATUS_REG */
4931         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4932         if (!(tmp & SDMA_IDLE))
4933                 reset_mask |= RADEON_RESET_DMA;
4934
4935         /* SDMA1_STATUS_REG */
4936         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4937         if (!(tmp & SDMA_IDLE))
4938                 reset_mask |= RADEON_RESET_DMA1;
4939
4940         /* SRBM_STATUS2 */
4941         tmp = RREG32(SRBM_STATUS2);
4942         if (tmp & SDMA_BUSY)
4943                 reset_mask |= RADEON_RESET_DMA;
4944
4945         if (tmp & SDMA1_BUSY)
4946                 reset_mask |= RADEON_RESET_DMA1;
4947
4948         /* SRBM_STATUS */
4949         tmp = RREG32(SRBM_STATUS);
4950
4951         if (tmp & IH_BUSY)
4952                 reset_mask |= RADEON_RESET_IH;
4953
4954         if (tmp & SEM_BUSY)
4955                 reset_mask |= RADEON_RESET_SEM;
4956
4957         if (tmp & GRBM_RQ_PENDING)
4958                 reset_mask |= RADEON_RESET_GRBM;
4959
4960         if (tmp & VMC_BUSY)
4961                 reset_mask |= RADEON_RESET_VMC;
4962
4963         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4964                    MCC_BUSY | MCD_BUSY))
4965                 reset_mask |= RADEON_RESET_MC;
4966
4967         if (evergreen_is_display_hung(rdev))
4968                 reset_mask |= RADEON_RESET_DISPLAY;
4969
4970         /* Skip MC reset as it's mostly likely not hung, just busy */
4971         if (reset_mask & RADEON_RESET_MC) {
4972                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4973                 reset_mask &= ~RADEON_RESET_MC;
4974         }
4975
4976         return reset_mask;
4977 }
4978
4979 /**
4980  * cik_gpu_soft_reset - soft reset GPU
4981  *
4982  * @rdev: radeon_device pointer
4983  * @reset_mask: mask of which blocks to reset
4984  *
4985  * Soft reset the blocks specified in @reset_mask.
4986  */
4987 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4988 {
4989         struct evergreen_mc_save save;
4990         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4991         u32 tmp;
4992
4993         if (reset_mask == 0)
4994                 return;
4995
4996         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4997
4998         cik_print_gpu_status_regs(rdev);
4999         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5000                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5001         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5002                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5003
5004         /* disable CG/PG */
5005         cik_fini_pg(rdev);
5006         cik_fini_cg(rdev);
5007
5008         /* stop the rlc */
5009         cik_rlc_stop(rdev);
5010
5011         /* Disable GFX parsing/prefetching */
5012         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5013
5014         /* Disable MEC parsing/prefetching */
5015         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5016
5017         if (reset_mask & RADEON_RESET_DMA) {
5018                 /* sdma0 */
5019                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5020                 tmp |= SDMA_HALT;
5021                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5022         }
5023         if (reset_mask & RADEON_RESET_DMA1) {
5024                 /* sdma1 */
5025                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5026                 tmp |= SDMA_HALT;
5027                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5028         }
5029
5030         evergreen_mc_stop(rdev, &save);
5031         if (evergreen_mc_wait_for_idle(rdev)) {
5032                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5033         }
5034
5035         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5036                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5037
5038         if (reset_mask & RADEON_RESET_CP) {
5039                 grbm_soft_reset |= SOFT_RESET_CP;
5040
5041                 srbm_soft_reset |= SOFT_RESET_GRBM;
5042         }
5043
5044         if (reset_mask & RADEON_RESET_DMA)
5045                 srbm_soft_reset |= SOFT_RESET_SDMA;
5046
5047         if (reset_mask & RADEON_RESET_DMA1)
5048                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5049
5050         if (reset_mask & RADEON_RESET_DISPLAY)
5051                 srbm_soft_reset |= SOFT_RESET_DC;
5052
5053         if (reset_mask & RADEON_RESET_RLC)
5054                 grbm_soft_reset |= SOFT_RESET_RLC;
5055
5056         if (reset_mask & RADEON_RESET_SEM)
5057                 srbm_soft_reset |= SOFT_RESET_SEM;
5058
5059         if (reset_mask & RADEON_RESET_IH)
5060                 srbm_soft_reset |= SOFT_RESET_IH;
5061
5062         if (reset_mask & RADEON_RESET_GRBM)
5063                 srbm_soft_reset |= SOFT_RESET_GRBM;
5064
5065         if (reset_mask & RADEON_RESET_VMC)
5066                 srbm_soft_reset |= SOFT_RESET_VMC;
5067
5068         if (!(rdev->flags & RADEON_IS_IGP)) {
5069                 if (reset_mask & RADEON_RESET_MC)
5070                         srbm_soft_reset |= SOFT_RESET_MC;
5071         }
5072
5073         if (grbm_soft_reset) {
5074                 tmp = RREG32(GRBM_SOFT_RESET);
5075                 tmp |= grbm_soft_reset;
5076                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5077                 WREG32(GRBM_SOFT_RESET, tmp);
5078                 tmp = RREG32(GRBM_SOFT_RESET);
5079
5080                 udelay(50);
5081
5082                 tmp &= ~grbm_soft_reset;
5083                 WREG32(GRBM_SOFT_RESET, tmp);
5084                 tmp = RREG32(GRBM_SOFT_RESET);
5085         }
5086
5087         if (srbm_soft_reset) {
5088                 tmp = RREG32(SRBM_SOFT_RESET);
5089                 tmp |= srbm_soft_reset;
5090                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5091                 WREG32(SRBM_SOFT_RESET, tmp);
5092                 tmp = RREG32(SRBM_SOFT_RESET);
5093
5094                 udelay(50);
5095
5096                 tmp &= ~srbm_soft_reset;
5097                 WREG32(SRBM_SOFT_RESET, tmp);
5098                 tmp = RREG32(SRBM_SOFT_RESET);
5099         }
5100
5101         /* Wait a little for things to settle down */
5102         udelay(50);
5103
5104         evergreen_mc_resume(rdev, &save);
5105         udelay(50);
5106
5107         cik_print_gpu_status_regs(rdev);
5108 }
5109
5110 struct kv_reset_save_regs {
5111         u32 gmcon_reng_execute;
5112         u32 gmcon_misc;
5113         u32 gmcon_misc3;
5114 };
5115
5116 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5117                                    struct kv_reset_save_regs *save)
5118 {
5119         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5120         save->gmcon_misc = RREG32(GMCON_MISC);
5121         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5122
5123         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5124         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5125                                                 STCTRL_STUTTER_EN));
5126 }
5127
5128 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5129                                       struct kv_reset_save_regs *save)
5130 {
5131         int i;
5132
5133         WREG32(GMCON_PGFSM_WRITE, 0);
5134         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5135
5136         for (i = 0; i < 5; i++)
5137                 WREG32(GMCON_PGFSM_WRITE, 0);
5138
5139         WREG32(GMCON_PGFSM_WRITE, 0);
5140         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5141
5142         for (i = 0; i < 5; i++)
5143                 WREG32(GMCON_PGFSM_WRITE, 0);
5144
5145         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5146         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5147
5148         for (i = 0; i < 5; i++)
5149                 WREG32(GMCON_PGFSM_WRITE, 0);
5150
5151         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5152         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5153
5154         for (i = 0; i < 5; i++)
5155                 WREG32(GMCON_PGFSM_WRITE, 0);
5156
5157         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5158         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5159
5160         for (i = 0; i < 5; i++)
5161                 WREG32(GMCON_PGFSM_WRITE, 0);
5162
5163         WREG32(GMCON_PGFSM_WRITE, 0);
5164         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5165
5166         for (i = 0; i < 5; i++)
5167                 WREG32(GMCON_PGFSM_WRITE, 0);
5168
5169         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5170         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5171
5172         for (i = 0; i < 5; i++)
5173                 WREG32(GMCON_PGFSM_WRITE, 0);
5174
5175         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5176         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5177
5178         for (i = 0; i < 5; i++)
5179                 WREG32(GMCON_PGFSM_WRITE, 0);
5180
5181         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5182         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5183
5184         for (i = 0; i < 5; i++)
5185                 WREG32(GMCON_PGFSM_WRITE, 0);
5186
5187         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5188         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5189
5190         for (i = 0; i < 5; i++)
5191                 WREG32(GMCON_PGFSM_WRITE, 0);
5192
5193         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5194         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5195
5196         WREG32(GMCON_MISC3, save->gmcon_misc3);
5197         WREG32(GMCON_MISC, save->gmcon_misc);
5198         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5199 }
5200
5201 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5202 {
5203         struct evergreen_mc_save save;
5204         struct kv_reset_save_regs kv_save = { 0 };
5205         u32 tmp, i;
5206
5207         dev_info(rdev->dev, "GPU pci config reset\n");
5208
5209         /* disable dpm? */
5210
5211         /* disable cg/pg */
5212         cik_fini_pg(rdev);
5213         cik_fini_cg(rdev);
5214
5215         /* Disable GFX parsing/prefetching */
5216         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5217
5218         /* Disable MEC parsing/prefetching */
5219         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5220
5221         /* sdma0 */
5222         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5223         tmp |= SDMA_HALT;
5224         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5225         /* sdma1 */
5226         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5227         tmp |= SDMA_HALT;
5228         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5229         /* XXX other engines? */
5230
5231         /* halt the rlc, disable cp internal ints */
5232         cik_rlc_stop(rdev);
5233
5234         udelay(50);
5235
5236         /* disable mem access */
5237         evergreen_mc_stop(rdev, &save);
5238         if (evergreen_mc_wait_for_idle(rdev)) {
5239                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5240         }
5241
5242         if (rdev->flags & RADEON_IS_IGP)
5243                 kv_save_regs_for_reset(rdev, &kv_save);
5244
5245         /* disable BM */
5246         pci_clear_master(rdev->pdev);
5247         /* reset */
5248         radeon_pci_config_reset(rdev);
5249
5250         udelay(100);
5251
5252         /* wait for asic to come out of reset */
5253         for (i = 0; i < rdev->usec_timeout; i++) {
5254                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5255                         break;
5256                 udelay(1);
5257         }
5258
5259         /* does asic init need to be run first??? */
5260         if (rdev->flags & RADEON_IS_IGP)
5261                 kv_restore_regs_for_reset(rdev, &kv_save);
5262 }
5263
5264 /**
5265  * cik_asic_reset - soft reset GPU
5266  *
5267  * @rdev: radeon_device pointer
5268  * @hard: force hard reset
5269  *
5270  * Look up which blocks are hung and attempt
5271  * to reset them.
5272  * Returns 0 for success.
5273  */
5274 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5275 {
5276         u32 reset_mask;
5277
5278         if (hard) {
5279                 cik_gpu_pci_config_reset(rdev);
5280                 return 0;
5281         }
5282
5283         reset_mask = cik_gpu_check_soft_reset(rdev);
5284
5285         if (reset_mask)
5286                 r600_set_bios_scratch_engine_hung(rdev, true);
5287
5288         /* try soft reset */
5289         cik_gpu_soft_reset(rdev, reset_mask);
5290
5291         reset_mask = cik_gpu_check_soft_reset(rdev);
5292
5293         /* try pci config reset */
5294         if (reset_mask && radeon_hard_reset)
5295                 cik_gpu_pci_config_reset(rdev);
5296
5297         reset_mask = cik_gpu_check_soft_reset(rdev);
5298
5299         if (!reset_mask)
5300                 r600_set_bios_scratch_engine_hung(rdev, false);
5301
5302         return 0;
5303 }
5304
5305 /**
5306  * cik_gfx_is_lockup - check if the 3D engine is locked up
5307  *
5308  * @rdev: radeon_device pointer
5309  * @ring: radeon_ring structure holding ring information
5310  *
5311  * Check if the 3D engine is locked up (CIK).
5312  * Returns true if the engine is locked, false if not.
5313  */
5314 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5315 {
5316         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5317
5318         if (!(reset_mask & (RADEON_RESET_GFX |
5319                             RADEON_RESET_COMPUTE |
5320                             RADEON_RESET_CP))) {
5321                 radeon_ring_lockup_update(rdev, ring);
5322                 return false;
5323         }
5324         return radeon_ring_test_lockup(rdev, ring);
5325 }
5326
5327 /* MC */
5328 /**
5329  * cik_mc_program - program the GPU memory controller
5330  *
5331  * @rdev: radeon_device pointer
5332  *
5333  * Set the location of vram, gart, and AGP in the GPU's
5334  * physical address space (CIK).
5335  */
5336 static void cik_mc_program(struct radeon_device *rdev)
5337 {
5338         struct evergreen_mc_save save;
5339         u32 tmp;
5340         int i, j;
5341
5342         /* Initialize HDP */
5343         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5344                 WREG32((0x2c14 + j), 0x00000000);
5345                 WREG32((0x2c18 + j), 0x00000000);
5346                 WREG32((0x2c1c + j), 0x00000000);
5347                 WREG32((0x2c20 + j), 0x00000000);
5348                 WREG32((0x2c24 + j), 0x00000000);
5349         }
5350         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5351
5352         evergreen_mc_stop(rdev, &save);
5353         if (radeon_mc_wait_for_idle(rdev)) {
5354                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5355         }
5356         /* Lockout access through VGA aperture*/
5357         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5358         /* Update configuration */
5359         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5360                rdev->mc.vram_start >> 12);
5361         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5362                rdev->mc.vram_end >> 12);
5363         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5364                rdev->vram_scratch.gpu_addr >> 12);
5365         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5366         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5367         WREG32(MC_VM_FB_LOCATION, tmp);
5368         /* XXX double check these! */
5369         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5370         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5371         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5372         WREG32(MC_VM_AGP_BASE, 0);
5373         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5374         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5375         if (radeon_mc_wait_for_idle(rdev)) {
5376                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5377         }
5378         evergreen_mc_resume(rdev, &save);
5379         /* we need to own VRAM, so turn off the VGA renderer here
5380          * to stop it overwriting our objects */
5381         rv515_vga_render_disable(rdev);
5382 }
5383
5384 /**
5385  * cik_mc_init - initialize the memory controller driver params
5386  *
5387  * @rdev: radeon_device pointer
5388  *
5389  * Look up the amount of vram, vram width, and decide how to place
5390  * vram and gart within the GPU's physical address space (CIK).
5391  * Returns 0 for success.
5392  */
5393 static int cik_mc_init(struct radeon_device *rdev)
5394 {
5395         u32 tmp;
5396         int chansize, numchan;
5397
5398         /* Get VRAM informations */
5399         rdev->mc.vram_is_ddr = true;
5400         tmp = RREG32(MC_ARB_RAMCFG);
5401         if (tmp & CHANSIZE_MASK) {
5402                 chansize = 64;
5403         } else {
5404                 chansize = 32;
5405         }
5406         tmp = RREG32(MC_SHARED_CHMAP);
5407         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5408         case 0:
5409         default:
5410                 numchan = 1;
5411                 break;
5412         case 1:
5413                 numchan = 2;
5414                 break;
5415         case 2:
5416                 numchan = 4;
5417                 break;
5418         case 3:
5419                 numchan = 8;
5420                 break;
5421         case 4:
5422                 numchan = 3;
5423                 break;
5424         case 5:
5425                 numchan = 6;
5426                 break;
5427         case 6:
5428                 numchan = 10;
5429                 break;
5430         case 7:
5431                 numchan = 12;
5432                 break;
5433         case 8:
5434                 numchan = 16;
5435                 break;
5436         }
5437         rdev->mc.vram_width = numchan * chansize;
5438         /* Could aper size report 0 ? */
5439         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5440         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5441         /* size in MB on si */
5442         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5443         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5444         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5445         si_vram_gtt_location(rdev, &rdev->mc);
5446         radeon_update_bandwidth_info(rdev);
5447
5448         return 0;
5449 }
5450
5451 /*
5452  * GART
5453  * VMID 0 is the physical GPU addresses as used by the kernel.
5454  * VMIDs 1-15 are used for userspace clients and are handled
5455  * by the radeon vm/hsa code.
5456  */
5457 /**
5458  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5459  *
5460  * @rdev: radeon_device pointer
5461  *
5462  * Flush the TLB for the VMID 0 page table (CIK).
5463  */
5464 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5465 {
5466         /* flush hdp cache */
5467         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5468
5469         /* bits 0-15 are the VM contexts0-15 */
5470         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5471 }
5472
5473 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5474 {
5475         int i;
5476         uint32_t sh_mem_bases, sh_mem_config;
5477
5478         sh_mem_bases = 0x6000 | 0x6000 << 16;
5479         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5480         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5481
5482         mutex_lock(&rdev->srbm_mutex);
5483         for (i = 8; i < 16; i++) {
5484                 cik_srbm_select(rdev, 0, 0, 0, i);
5485                 /* CP and shaders */
5486                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5487                 WREG32(SH_MEM_APE1_BASE, 1);
5488                 WREG32(SH_MEM_APE1_LIMIT, 0);
5489                 WREG32(SH_MEM_BASES, sh_mem_bases);
5490         }
5491         cik_srbm_select(rdev, 0, 0, 0, 0);
5492         mutex_unlock(&rdev->srbm_mutex);
5493 }
5494
5495 /**
5496  * cik_pcie_gart_enable - gart enable
5497  *
5498  * @rdev: radeon_device pointer
5499  *
5500  * This sets up the TLBs, programs the page tables for VMID0,
5501  * sets up the hw for VMIDs 1-15 which are allocated on
5502  * demand, and sets up the global locations for the LDS, GDS,
5503  * and GPUVM for FSA64 clients (CIK).
5504  * Returns 0 for success, errors for failure.
5505  */
5506 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5507 {
5508         int r, i;
5509
5510         if (rdev->gart.robj == NULL) {
5511                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5512                 return -EINVAL;
5513         }
5514         r = radeon_gart_table_vram_pin(rdev);
5515         if (r)
5516                 return r;
5517         /* Setup TLB control */
5518         WREG32(MC_VM_MX_L1_TLB_CNTL,
5519                (0xA << 7) |
5520                ENABLE_L1_TLB |
5521                ENABLE_L1_FRAGMENT_PROCESSING |
5522                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5523                ENABLE_ADVANCED_DRIVER_MODEL |
5524                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5525         /* Setup L2 cache */
5526         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5527                ENABLE_L2_FRAGMENT_PROCESSING |
5528                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5529                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5530                EFFECTIVE_L2_QUEUE_SIZE(7) |
5531                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5532         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5533         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5534                BANK_SELECT(4) |
5535                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5536         /* setup context0 */
5537         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5538         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5539         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5540         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5541                         (u32)(rdev->dummy_page.addr >> 12));
5542         WREG32(VM_CONTEXT0_CNTL2, 0);
5543         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5544                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5545
5546         WREG32(0x15D4, 0);
5547         WREG32(0x15D8, 0);
5548         WREG32(0x15DC, 0);
5549
5550         /* restore context1-15 */
5551         /* set vm size, must be a multiple of 4 */
5552         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5553         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5554         for (i = 1; i < 16; i++) {
5555                 if (i < 8)
5556                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5557                                rdev->vm_manager.saved_table_addr[i]);
5558                 else
5559                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5560                                rdev->vm_manager.saved_table_addr[i]);
5561         }
5562
5563         /* enable context1-15 */
5564         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5565                (u32)(rdev->dummy_page.addr >> 12));
5566         WREG32(VM_CONTEXT1_CNTL2, 4);
5567         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5568                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5569                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5571                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5572                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5573                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5574                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5575                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5576                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5577                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5578                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5579                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5580                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5581
5582         if (rdev->family == CHIP_KAVERI) {
5583                 u32 tmp = RREG32(CHUB_CONTROL);
5584                 tmp &= ~BYPASS_VM;
5585                 WREG32(CHUB_CONTROL, tmp);
5586         }
5587
5588         /* XXX SH_MEM regs */
5589         /* where to put LDS, scratch, GPUVM in FSA64 space */
5590         mutex_lock(&rdev->srbm_mutex);
5591         for (i = 0; i < 16; i++) {
5592                 cik_srbm_select(rdev, 0, 0, 0, i);
5593                 /* CP and shaders */
5594                 WREG32(SH_MEM_CONFIG, 0);
5595                 WREG32(SH_MEM_APE1_BASE, 1);
5596                 WREG32(SH_MEM_APE1_LIMIT, 0);
5597                 WREG32(SH_MEM_BASES, 0);
5598                 /* SDMA GFX */
5599                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5600                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5601                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5602                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5603                 /* XXX SDMA RLC - todo */
5604         }
5605         cik_srbm_select(rdev, 0, 0, 0, 0);
5606         mutex_unlock(&rdev->srbm_mutex);
5607
5608         cik_pcie_init_compute_vmid(rdev);
5609
5610         cik_pcie_gart_tlb_flush(rdev);
5611         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5612                  (unsigned)(rdev->mc.gtt_size >> 20),
5613                  (unsigned long long)rdev->gart.table_addr);
5614         rdev->gart.ready = true;
5615         return 0;
5616 }
5617
5618 /**
5619  * cik_pcie_gart_disable - gart disable
5620  *
5621  * @rdev: radeon_device pointer
5622  *
5623  * This disables all VM page table (CIK).
5624  */
5625 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5626 {
5627         unsigned i;
5628
5629         for (i = 1; i < 16; ++i) {
5630                 uint32_t reg;
5631                 if (i < 8)
5632                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5633                 else
5634                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5635                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5636         }
5637
5638         /* Disable all tables */
5639         WREG32(VM_CONTEXT0_CNTL, 0);
5640         WREG32(VM_CONTEXT1_CNTL, 0);
5641         /* Setup TLB control */
5642         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5643                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5644         /* Setup L2 cache */
5645         WREG32(VM_L2_CNTL,
5646                ENABLE_L2_FRAGMENT_PROCESSING |
5647                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5648                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5649                EFFECTIVE_L2_QUEUE_SIZE(7) |
5650                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5651         WREG32(VM_L2_CNTL2, 0);
5652         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5653                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5654         radeon_gart_table_vram_unpin(rdev);
5655 }
5656
5657 /**
5658  * cik_pcie_gart_fini - vm fini callback
5659  *
5660  * @rdev: radeon_device pointer
5661  *
5662  * Tears down the driver GART/VM setup (CIK).
5663  */
5664 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5665 {
5666         cik_pcie_gart_disable(rdev);
5667         radeon_gart_table_vram_free(rdev);
5668         radeon_gart_fini(rdev);
5669 }
5670
5671 /* vm parser */
5672 /**
5673  * cik_ib_parse - vm ib_parse callback
5674  *
5675  * @rdev: radeon_device pointer
5676  * @ib: indirect buffer pointer
5677  *
5678  * CIK uses hw IB checking so this is a nop (CIK).
5679  */
5680 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5681 {
5682         return 0;
5683 }
5684
5685 /*
5686  * vm
5687  * VMID 0 is the physical GPU addresses as used by the kernel.
5688  * VMIDs 1-15 are used for userspace clients and are handled
5689  * by the radeon vm/hsa code.
5690  */
5691 /**
5692  * cik_vm_init - cik vm init callback
5693  *
5694  * @rdev: radeon_device pointer
5695  *
5696  * Inits cik specific vm parameters (number of VMs, base of vram for
5697  * VMIDs 1-15) (CIK).
5698  * Returns 0 for success.
5699  */
5700 int cik_vm_init(struct radeon_device *rdev)
5701 {
5702         /*
5703          * number of VMs
5704          * VMID 0 is reserved for System
5705          * radeon graphics/compute will use VMIDs 1-7
5706          * amdkfd will use VMIDs 8-15
5707          */
5708         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5709         /* base offset of vram pages */
5710         if (rdev->flags & RADEON_IS_IGP) {
5711                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5712                 tmp <<= 22;
5713                 rdev->vm_manager.vram_base_offset = tmp;
5714         } else
5715                 rdev->vm_manager.vram_base_offset = 0;
5716
5717         return 0;
5718 }
5719
5720 /**
5721  * cik_vm_fini - cik vm fini callback
5722  *
5723  * @rdev: radeon_device pointer
5724  *
5725  * Tear down any asic specific VM setup (CIK).
5726  */
5727 void cik_vm_fini(struct radeon_device *rdev)
5728 {
5729 }
5730
5731 /**
5732  * cik_vm_decode_fault - print human readable fault info
5733  *
5734  * @rdev: radeon_device pointer
5735  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5736  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5737  *
5738  * Print human readable fault information (CIK).
5739  */
5740 static void cik_vm_decode_fault(struct radeon_device *rdev,
5741                                 u32 status, u32 addr, u32 mc_client)
5742 {
5743         u32 mc_id;
5744         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5745         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5746         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5747                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5748
5749         if (rdev->family == CHIP_HAWAII)
5750                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5751         else
5752                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5753
5754         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5755                protections, vmid, addr,
5756                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5757                block, mc_client, mc_id);
5758 }
5759
5760 /**
5761  * cik_vm_flush - cik vm flush using the CP
5762  *
5763  * @rdev: radeon_device pointer
5764  *
5765  * Update the page table base and flush the VM TLB
5766  * using the CP (CIK).
5767  */
5768 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5769                   unsigned vm_id, uint64_t pd_addr)
5770 {
5771         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5772
5773         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5774         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5775                                  WRITE_DATA_DST_SEL(0)));
5776         if (vm_id < 8) {
5777                 radeon_ring_write(ring,
5778                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5779         } else {
5780                 radeon_ring_write(ring,
5781                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5782         }
5783         radeon_ring_write(ring, 0);
5784         radeon_ring_write(ring, pd_addr >> 12);
5785
5786         /* update SH_MEM_* regs */
5787         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5788         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5789                                  WRITE_DATA_DST_SEL(0)));
5790         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5791         radeon_ring_write(ring, 0);
5792         radeon_ring_write(ring, VMID(vm_id));
5793
5794         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5795         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5796                                  WRITE_DATA_DST_SEL(0)));
5797         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5798         radeon_ring_write(ring, 0);
5799
5800         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5801         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5802         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5803         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5804
5805         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5806         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5807                                  WRITE_DATA_DST_SEL(0)));
5808         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5809         radeon_ring_write(ring, 0);
5810         radeon_ring_write(ring, VMID(0));
5811
5812         /* HDP flush */
5813         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5814
5815         /* bits 0-15 are the VM contexts0-15 */
5816         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5817         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5818                                  WRITE_DATA_DST_SEL(0)));
5819         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5820         radeon_ring_write(ring, 0);
5821         radeon_ring_write(ring, 1 << vm_id);
5822
5823         /* wait for the invalidate to complete */
5824         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5825         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5826                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5827                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5828         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5829         radeon_ring_write(ring, 0);
5830         radeon_ring_write(ring, 0); /* ref */
5831         radeon_ring_write(ring, 0); /* mask */
5832         radeon_ring_write(ring, 0x20); /* poll interval */
5833
5834         /* compute doesn't have PFP */
5835         if (usepfp) {
5836                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5837                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5838                 radeon_ring_write(ring, 0x0);
5839         }
5840 }
5841
5842 /*
5843  * RLC
5844  * The RLC is a multi-purpose microengine that handles a
5845  * variety of functions, the most important of which is
5846  * the interrupt controller.
5847  */
5848 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5849                                           bool enable)
5850 {
5851         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5852
5853         if (enable)
5854                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5855         else
5856                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5857         WREG32(CP_INT_CNTL_RING0, tmp);
5858 }
5859
5860 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5861 {
5862         u32 tmp;
5863
5864         tmp = RREG32(RLC_LB_CNTL);
5865         if (enable)
5866                 tmp |= LOAD_BALANCE_ENABLE;
5867         else
5868                 tmp &= ~LOAD_BALANCE_ENABLE;
5869         WREG32(RLC_LB_CNTL, tmp);
5870 }
5871
5872 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5873 {
5874         u32 i, j, k;
5875         u32 mask;
5876
5877         mutex_lock(&rdev->grbm_idx_mutex);
5878         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5879                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5880                         cik_select_se_sh(rdev, i, j);
5881                         for (k = 0; k < rdev->usec_timeout; k++) {
5882                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5883                                         break;
5884                                 udelay(1);
5885                         }
5886                 }
5887         }
5888         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5889         mutex_unlock(&rdev->grbm_idx_mutex);
5890
5891         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5892         for (k = 0; k < rdev->usec_timeout; k++) {
5893                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5894                         break;
5895                 udelay(1);
5896         }
5897 }
5898
5899 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5900 {
5901         u32 tmp;
5902
5903         tmp = RREG32(RLC_CNTL);
5904         if (tmp != rlc)
5905                 WREG32(RLC_CNTL, rlc);
5906 }
5907
5908 static u32 cik_halt_rlc(struct radeon_device *rdev)
5909 {
5910         u32 data, orig;
5911
5912         orig = data = RREG32(RLC_CNTL);
5913
5914         if (data & RLC_ENABLE) {
5915                 u32 i;
5916
5917                 data &= ~RLC_ENABLE;
5918                 WREG32(RLC_CNTL, data);
5919
5920                 for (i = 0; i < rdev->usec_timeout; i++) {
5921                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5922                                 break;
5923                         udelay(1);
5924                 }
5925
5926                 cik_wait_for_rlc_serdes(rdev);
5927         }
5928
5929         return orig;
5930 }
5931
5932 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5933 {
5934         u32 tmp, i, mask;
5935
5936         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5937         WREG32(RLC_GPR_REG2, tmp);
5938
5939         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5940         for (i = 0; i < rdev->usec_timeout; i++) {
5941                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5942                         break;
5943                 udelay(1);
5944         }
5945
5946         for (i = 0; i < rdev->usec_timeout; i++) {
5947                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5948                         break;
5949                 udelay(1);
5950         }
5951 }
5952
5953 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5954 {
5955         u32 tmp;
5956
5957         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5958         WREG32(RLC_GPR_REG2, tmp);
5959 }
5960
5961 /**
5962  * cik_rlc_stop - stop the RLC ME
5963  *
5964  * @rdev: radeon_device pointer
5965  *
5966  * Halt the RLC ME (MicroEngine) (CIK).
5967  */
5968 static void cik_rlc_stop(struct radeon_device *rdev)
5969 {
5970         WREG32(RLC_CNTL, 0);
5971
5972         cik_enable_gui_idle_interrupt(rdev, false);
5973
5974         cik_wait_for_rlc_serdes(rdev);
5975 }
5976
5977 /**
5978  * cik_rlc_start - start the RLC ME
5979  *
5980  * @rdev: radeon_device pointer
5981  *
5982  * Unhalt the RLC ME (MicroEngine) (CIK).
5983  */
5984 static void cik_rlc_start(struct radeon_device *rdev)
5985 {
5986         WREG32(RLC_CNTL, RLC_ENABLE);
5987
5988         cik_enable_gui_idle_interrupt(rdev, true);
5989
5990         udelay(50);
5991 }
5992
5993 /**
5994  * cik_rlc_resume - setup the RLC hw
5995  *
5996  * @rdev: radeon_device pointer
5997  *
5998  * Initialize the RLC registers, load the ucode,
5999  * and start the RLC (CIK).
6000  * Returns 0 for success, -EINVAL if the ucode is not available.
6001  */
6002 static int cik_rlc_resume(struct radeon_device *rdev)
6003 {
6004         u32 i, size, tmp;
6005
6006         if (!rdev->rlc_fw)
6007                 return -EINVAL;
6008
6009         cik_rlc_stop(rdev);
6010
6011         /* disable CG */
6012         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6013         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6014
6015         si_rlc_reset(rdev);
6016
6017         cik_init_pg(rdev);
6018
6019         cik_init_cg(rdev);
6020
6021         WREG32(RLC_LB_CNTR_INIT, 0);
6022         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6023
6024         mutex_lock(&rdev->grbm_idx_mutex);
6025         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6026         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6027         WREG32(RLC_LB_PARAMS, 0x00600408);
6028         WREG32(RLC_LB_CNTL, 0x80000004);
6029         mutex_unlock(&rdev->grbm_idx_mutex);
6030
6031         WREG32(RLC_MC_CNTL, 0);
6032         WREG32(RLC_UCODE_CNTL, 0);
6033
6034         if (rdev->new_fw) {
6035                 const struct rlc_firmware_header_v1_0 *hdr =
6036                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6037                 const __le32 *fw_data = (const __le32 *)
6038                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6039
6040                 radeon_ucode_print_rlc_hdr(&hdr->header);
6041
6042                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6043                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6044                 for (i = 0; i < size; i++)
6045                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6046                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6047         } else {
6048                 const __be32 *fw_data;
6049
6050                 switch (rdev->family) {
6051                 case CHIP_BONAIRE:
6052                 case CHIP_HAWAII:
6053                 default:
6054                         size = BONAIRE_RLC_UCODE_SIZE;
6055                         break;
6056                 case CHIP_KAVERI:
6057                         size = KV_RLC_UCODE_SIZE;
6058                         break;
6059                 case CHIP_KABINI:
6060                         size = KB_RLC_UCODE_SIZE;
6061                         break;
6062                 case CHIP_MULLINS:
6063                         size = ML_RLC_UCODE_SIZE;
6064                         break;
6065                 }
6066
6067                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6068                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6069                 for (i = 0; i < size; i++)
6070                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6071                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6072         }
6073
6074         /* XXX - find out what chips support lbpw */
6075         cik_enable_lbpw(rdev, false);
6076
6077         if (rdev->family == CHIP_BONAIRE)
6078                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6079
6080         cik_rlc_start(rdev);
6081
6082         return 0;
6083 }
6084
6085 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6086 {
6087         u32 data, orig, tmp, tmp2;
6088
6089         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6090
6091         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6092                 cik_enable_gui_idle_interrupt(rdev, true);
6093
6094                 tmp = cik_halt_rlc(rdev);
6095
6096                 mutex_lock(&rdev->grbm_idx_mutex);
6097                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6098                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6099                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6100                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6101                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6102                 mutex_unlock(&rdev->grbm_idx_mutex);
6103
6104                 cik_update_rlc(rdev, tmp);
6105
6106                 data |= CGCG_EN | CGLS_EN;
6107         } else {
6108                 cik_enable_gui_idle_interrupt(rdev, false);
6109
6110                 RREG32(CB_CGTT_SCLK_CTRL);
6111                 RREG32(CB_CGTT_SCLK_CTRL);
6112                 RREG32(CB_CGTT_SCLK_CTRL);
6113                 RREG32(CB_CGTT_SCLK_CTRL);
6114
6115                 data &= ~(CGCG_EN | CGLS_EN);
6116         }
6117
6118         if (orig != data)
6119                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6120
6121 }
6122
6123 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6124 {
6125         u32 data, orig, tmp = 0;
6126
6127         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6128                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6129                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6130                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6131                                 data |= CP_MEM_LS_EN;
6132                                 if (orig != data)
6133                                         WREG32(CP_MEM_SLP_CNTL, data);
6134                         }
6135                 }
6136
6137                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6138                 data |= 0x00000001;
6139                 data &= 0xfffffffd;
6140                 if (orig != data)
6141                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6142
6143                 tmp = cik_halt_rlc(rdev);
6144
6145                 mutex_lock(&rdev->grbm_idx_mutex);
6146                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6147                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6148                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6149                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6150                 WREG32(RLC_SERDES_WR_CTRL, data);
6151                 mutex_unlock(&rdev->grbm_idx_mutex);
6152
6153                 cik_update_rlc(rdev, tmp);
6154
6155                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6156                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6157                         data &= ~SM_MODE_MASK;
6158                         data |= SM_MODE(0x2);
6159                         data |= SM_MODE_ENABLE;
6160                         data &= ~CGTS_OVERRIDE;
6161                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6162                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6163                                 data &= ~CGTS_LS_OVERRIDE;
6164                         data &= ~ON_MONITOR_ADD_MASK;
6165                         data |= ON_MONITOR_ADD_EN;
6166                         data |= ON_MONITOR_ADD(0x96);
6167                         if (orig != data)
6168                                 WREG32(CGTS_SM_CTRL_REG, data);
6169                 }
6170         } else {
6171                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6172                 data |= 0x00000003;
6173                 if (orig != data)
6174                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6175
6176                 data = RREG32(RLC_MEM_SLP_CNTL);
6177                 if (data & RLC_MEM_LS_EN) {
6178                         data &= ~RLC_MEM_LS_EN;
6179                         WREG32(RLC_MEM_SLP_CNTL, data);
6180                 }
6181
6182                 data = RREG32(CP_MEM_SLP_CNTL);
6183                 if (data & CP_MEM_LS_EN) {
6184                         data &= ~CP_MEM_LS_EN;
6185                         WREG32(CP_MEM_SLP_CNTL, data);
6186                 }
6187
6188                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6189                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6190                 if (orig != data)
6191                         WREG32(CGTS_SM_CTRL_REG, data);
6192
6193                 tmp = cik_halt_rlc(rdev);
6194
6195                 mutex_lock(&rdev->grbm_idx_mutex);
6196                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6197                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6198                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6199                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6200                 WREG32(RLC_SERDES_WR_CTRL, data);
6201                 mutex_unlock(&rdev->grbm_idx_mutex);
6202
6203                 cik_update_rlc(rdev, tmp);
6204         }
6205 }
6206
6207 static const u32 mc_cg_registers[] =
6208 {
6209         MC_HUB_MISC_HUB_CG,
6210         MC_HUB_MISC_SIP_CG,
6211         MC_HUB_MISC_VM_CG,
6212         MC_XPB_CLK_GAT,
6213         ATC_MISC_CG,
6214         MC_CITF_MISC_WR_CG,
6215         MC_CITF_MISC_RD_CG,
6216         MC_CITF_MISC_VM_CG,
6217         VM_L2_CG,
6218 };
6219
6220 static void cik_enable_mc_ls(struct radeon_device *rdev,
6221                              bool enable)
6222 {
6223         int i;
6224         u32 orig, data;
6225
6226         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6227                 orig = data = RREG32(mc_cg_registers[i]);
6228                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6229                         data |= MC_LS_ENABLE;
6230                 else
6231                         data &= ~MC_LS_ENABLE;
6232                 if (data != orig)
6233                         WREG32(mc_cg_registers[i], data);
6234         }
6235 }
6236
6237 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6238                                bool enable)
6239 {
6240         int i;
6241         u32 orig, data;
6242
6243         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6244                 orig = data = RREG32(mc_cg_registers[i]);
6245                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6246                         data |= MC_CG_ENABLE;
6247                 else
6248                         data &= ~MC_CG_ENABLE;
6249                 if (data != orig)
6250                         WREG32(mc_cg_registers[i], data);
6251         }
6252 }
6253
6254 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6255                                  bool enable)
6256 {
6257         u32 orig, data;
6258
6259         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6260                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6261                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6262         } else {
6263                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6264                 data |= 0xff000000;
6265                 if (data != orig)
6266                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6267
6268                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6269                 data |= 0xff000000;
6270                 if (data != orig)
6271                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6272         }
6273 }
6274
6275 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6276                                  bool enable)
6277 {
6278         u32 orig, data;
6279
6280         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6281                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6282                 data |= 0x100;
6283                 if (orig != data)
6284                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6285
6286                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6287                 data |= 0x100;
6288                 if (orig != data)
6289                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6290         } else {
6291                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6292                 data &= ~0x100;
6293                 if (orig != data)
6294                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6295
6296                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6297                 data &= ~0x100;
6298                 if (orig != data)
6299                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6300         }
6301 }
6302
6303 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6304                                 bool enable)
6305 {
6306         u32 orig, data;
6307
6308         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6309                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6310                 data = 0xfff;
6311                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6312
6313                 orig = data = RREG32(UVD_CGC_CTRL);
6314                 data |= DCM;
6315                 if (orig != data)
6316                         WREG32(UVD_CGC_CTRL, data);
6317         } else {
6318                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6319                 data &= ~0xfff;
6320                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6321
6322                 orig = data = RREG32(UVD_CGC_CTRL);
6323                 data &= ~DCM;
6324                 if (orig != data)
6325                         WREG32(UVD_CGC_CTRL, data);
6326         }
6327 }
6328
6329 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6330                                bool enable)
6331 {
6332         u32 orig, data;
6333
6334         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6335
6336         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6337                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6338                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6339         else
6340                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6341                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6342
6343         if (orig != data)
6344                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6345 }
6346
6347 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6348                                 bool enable)
6349 {
6350         u32 orig, data;
6351
6352         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6353
6354         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6355                 data &= ~CLOCK_GATING_DIS;
6356         else
6357                 data |= CLOCK_GATING_DIS;
6358
6359         if (orig != data)
6360                 WREG32(HDP_HOST_PATH_CNTL, data);
6361 }
6362
6363 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6364                               bool enable)
6365 {
6366         u32 orig, data;
6367
6368         orig = data = RREG32(HDP_MEM_POWER_LS);
6369
6370         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6371                 data |= HDP_LS_ENABLE;
6372         else
6373                 data &= ~HDP_LS_ENABLE;
6374
6375         if (orig != data)
6376                 WREG32(HDP_MEM_POWER_LS, data);
6377 }
6378
6379 void cik_update_cg(struct radeon_device *rdev,
6380                    u32 block, bool enable)
6381 {
6382
6383         if (block & RADEON_CG_BLOCK_GFX) {
6384                 cik_enable_gui_idle_interrupt(rdev, false);
6385                 /* order matters! */
6386                 if (enable) {
6387                         cik_enable_mgcg(rdev, true);
6388                         cik_enable_cgcg(rdev, true);
6389                 } else {
6390                         cik_enable_cgcg(rdev, false);
6391                         cik_enable_mgcg(rdev, false);
6392                 }
6393                 cik_enable_gui_idle_interrupt(rdev, true);
6394         }
6395
6396         if (block & RADEON_CG_BLOCK_MC) {
6397                 if (!(rdev->flags & RADEON_IS_IGP)) {
6398                         cik_enable_mc_mgcg(rdev, enable);
6399                         cik_enable_mc_ls(rdev, enable);
6400                 }
6401         }
6402
6403         if (block & RADEON_CG_BLOCK_SDMA) {
6404                 cik_enable_sdma_mgcg(rdev, enable);
6405                 cik_enable_sdma_mgls(rdev, enable);
6406         }
6407
6408         if (block & RADEON_CG_BLOCK_BIF) {
6409                 cik_enable_bif_mgls(rdev, enable);
6410         }
6411
6412         if (block & RADEON_CG_BLOCK_UVD) {
6413                 if (rdev->has_uvd)
6414                         cik_enable_uvd_mgcg(rdev, enable);
6415         }
6416
6417         if (block & RADEON_CG_BLOCK_HDP) {
6418                 cik_enable_hdp_mgcg(rdev, enable);
6419                 cik_enable_hdp_ls(rdev, enable);
6420         }
6421
6422         if (block & RADEON_CG_BLOCK_VCE) {
6423                 vce_v2_0_enable_mgcg(rdev, enable);
6424         }
6425 }
6426
6427 static void cik_init_cg(struct radeon_device *rdev)
6428 {
6429
6430         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6431
6432         if (rdev->has_uvd)
6433                 si_init_uvd_internal_cg(rdev);
6434
6435         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6436                              RADEON_CG_BLOCK_SDMA |
6437                              RADEON_CG_BLOCK_BIF |
6438                              RADEON_CG_BLOCK_UVD |
6439                              RADEON_CG_BLOCK_HDP), true);
6440 }
6441
6442 static void cik_fini_cg(struct radeon_device *rdev)
6443 {
6444         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6445                              RADEON_CG_BLOCK_SDMA |
6446                              RADEON_CG_BLOCK_BIF |
6447                              RADEON_CG_BLOCK_UVD |
6448                              RADEON_CG_BLOCK_HDP), false);
6449
6450         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6451 }
6452
6453 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6454                                           bool enable)
6455 {
6456         u32 data, orig;
6457
6458         orig = data = RREG32(RLC_PG_CNTL);
6459         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6460                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6461         else
6462                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6463         if (orig != data)
6464                 WREG32(RLC_PG_CNTL, data);
6465 }
6466
6467 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6468                                           bool enable)
6469 {
6470         u32 data, orig;
6471
6472         orig = data = RREG32(RLC_PG_CNTL);
6473         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6474                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6475         else
6476                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6477         if (orig != data)
6478                 WREG32(RLC_PG_CNTL, data);
6479 }
6480
6481 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6482 {
6483         u32 data, orig;
6484
6485         orig = data = RREG32(RLC_PG_CNTL);
6486         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6487                 data &= ~DISABLE_CP_PG;
6488         else
6489                 data |= DISABLE_CP_PG;
6490         if (orig != data)
6491                 WREG32(RLC_PG_CNTL, data);
6492 }
6493
6494 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6495 {
6496         u32 data, orig;
6497
6498         orig = data = RREG32(RLC_PG_CNTL);
6499         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6500                 data &= ~DISABLE_GDS_PG;
6501         else
6502                 data |= DISABLE_GDS_PG;
6503         if (orig != data)
6504                 WREG32(RLC_PG_CNTL, data);
6505 }
6506
6507 #define CP_ME_TABLE_SIZE    96
6508 #define CP_ME_TABLE_OFFSET  2048
6509 #define CP_MEC_TABLE_OFFSET 4096
6510
6511 void cik_init_cp_pg_table(struct radeon_device *rdev)
6512 {
6513         volatile u32 *dst_ptr;
6514         int me, i, max_me = 4;
6515         u32 bo_offset = 0;
6516         u32 table_offset, table_size;
6517
6518         if (rdev->family == CHIP_KAVERI)
6519                 max_me = 5;
6520
6521         if (rdev->rlc.cp_table_ptr == NULL)
6522                 return;
6523
6524         /* write the cp table buffer */
6525         dst_ptr = rdev->rlc.cp_table_ptr;
6526         for (me = 0; me < max_me; me++) {
6527                 if (rdev->new_fw) {
6528                         const __le32 *fw_data;
6529                         const struct gfx_firmware_header_v1_0 *hdr;
6530
6531                         if (me == 0) {
6532                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6533                                 fw_data = (const __le32 *)
6534                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6535                                 table_offset = le32_to_cpu(hdr->jt_offset);
6536                                 table_size = le32_to_cpu(hdr->jt_size);
6537                         } else if (me == 1) {
6538                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6539                                 fw_data = (const __le32 *)
6540                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6541                                 table_offset = le32_to_cpu(hdr->jt_offset);
6542                                 table_size = le32_to_cpu(hdr->jt_size);
6543                         } else if (me == 2) {
6544                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6545                                 fw_data = (const __le32 *)
6546                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6547                                 table_offset = le32_to_cpu(hdr->jt_offset);
6548                                 table_size = le32_to_cpu(hdr->jt_size);
6549                         } else if (me == 3) {
6550                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6551                                 fw_data = (const __le32 *)
6552                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6553                                 table_offset = le32_to_cpu(hdr->jt_offset);
6554                                 table_size = le32_to_cpu(hdr->jt_size);
6555                         } else {
6556                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6557                                 fw_data = (const __le32 *)
6558                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6559                                 table_offset = le32_to_cpu(hdr->jt_offset);
6560                                 table_size = le32_to_cpu(hdr->jt_size);
6561                         }
6562
6563                         for (i = 0; i < table_size; i ++) {
6564                                 dst_ptr[bo_offset + i] =
6565                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6566                         }
6567                         bo_offset += table_size;
6568                 } else {
6569                         const __be32 *fw_data;
6570                         table_size = CP_ME_TABLE_SIZE;
6571
6572                         if (me == 0) {
6573                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6574                                 table_offset = CP_ME_TABLE_OFFSET;
6575                         } else if (me == 1) {
6576                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6577                                 table_offset = CP_ME_TABLE_OFFSET;
6578                         } else if (me == 2) {
6579                                 fw_data = (const __be32 *)rdev->me_fw->data;
6580                                 table_offset = CP_ME_TABLE_OFFSET;
6581                         } else {
6582                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6583                                 table_offset = CP_MEC_TABLE_OFFSET;
6584                         }
6585
6586                         for (i = 0; i < table_size; i ++) {
6587                                 dst_ptr[bo_offset + i] =
6588                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6589                         }
6590                         bo_offset += table_size;
6591                 }
6592         }
6593 }
6594
6595 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6596                                 bool enable)
6597 {
6598         u32 data, orig;
6599
6600         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6601                 orig = data = RREG32(RLC_PG_CNTL);
6602                 data |= GFX_PG_ENABLE;
6603                 if (orig != data)
6604                         WREG32(RLC_PG_CNTL, data);
6605
6606                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6607                 data |= AUTO_PG_EN;
6608                 if (orig != data)
6609                         WREG32(RLC_AUTO_PG_CTRL, data);
6610         } else {
6611                 orig = data = RREG32(RLC_PG_CNTL);
6612                 data &= ~GFX_PG_ENABLE;
6613                 if (orig != data)
6614                         WREG32(RLC_PG_CNTL, data);
6615
6616                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6617                 data &= ~AUTO_PG_EN;
6618                 if (orig != data)
6619                         WREG32(RLC_AUTO_PG_CTRL, data);
6620
6621                 data = RREG32(DB_RENDER_CONTROL);
6622         }
6623 }
6624
6625 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6626 {
6627         u32 mask = 0, tmp, tmp1;
6628         int i;
6629
6630         mutex_lock(&rdev->grbm_idx_mutex);
6631         cik_select_se_sh(rdev, se, sh);
6632         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6633         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6634         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6635         mutex_unlock(&rdev->grbm_idx_mutex);
6636
6637         tmp &= 0xffff0000;
6638
6639         tmp |= tmp1;
6640         tmp >>= 16;
6641
6642         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6643                 mask <<= 1;
6644                 mask |= 1;
6645         }
6646
6647         return (~tmp) & mask;
6648 }
6649
6650 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6651 {
6652         u32 i, j, k, active_cu_number = 0;
6653         u32 mask, counter, cu_bitmap;
6654         u32 tmp = 0;
6655
6656         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6657                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6658                         mask = 1;
6659                         cu_bitmap = 0;
6660                         counter = 0;
6661                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6662                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6663                                         if (counter < 2)
6664                                                 cu_bitmap |= mask;
6665                                         counter ++;
6666                                 }
6667                                 mask <<= 1;
6668                         }
6669
6670                         active_cu_number += counter;
6671                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6672                 }
6673         }
6674
6675         WREG32(RLC_PG_AO_CU_MASK, tmp);
6676
6677         tmp = RREG32(RLC_MAX_PG_CU);
6678         tmp &= ~MAX_PU_CU_MASK;
6679         tmp |= MAX_PU_CU(active_cu_number);
6680         WREG32(RLC_MAX_PG_CU, tmp);
6681 }
6682
6683 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6684                                        bool enable)
6685 {
6686         u32 data, orig;
6687
6688         orig = data = RREG32(RLC_PG_CNTL);
6689         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6690                 data |= STATIC_PER_CU_PG_ENABLE;
6691         else
6692                 data &= ~STATIC_PER_CU_PG_ENABLE;
6693         if (orig != data)
6694                 WREG32(RLC_PG_CNTL, data);
6695 }
6696
6697 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6698                                         bool enable)
6699 {
6700         u32 data, orig;
6701
6702         orig = data = RREG32(RLC_PG_CNTL);
6703         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6704                 data |= DYN_PER_CU_PG_ENABLE;
6705         else
6706                 data &= ~DYN_PER_CU_PG_ENABLE;
6707         if (orig != data)
6708                 WREG32(RLC_PG_CNTL, data);
6709 }
6710
6711 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6712 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6713
6714 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6715 {
6716         u32 data, orig;
6717         u32 i;
6718
6719         if (rdev->rlc.cs_data) {
6720                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6721                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6722                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6723                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6724         } else {
6725                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6726                 for (i = 0; i < 3; i++)
6727                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6728         }
6729         if (rdev->rlc.reg_list) {
6730                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6731                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6732                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6733         }
6734
6735         orig = data = RREG32(RLC_PG_CNTL);
6736         data |= GFX_PG_SRC;
6737         if (orig != data)
6738                 WREG32(RLC_PG_CNTL, data);
6739
6740         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6741         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6742
6743         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6744         data &= ~IDLE_POLL_COUNT_MASK;
6745         data |= IDLE_POLL_COUNT(0x60);
6746         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6747
6748         data = 0x10101010;
6749         WREG32(RLC_PG_DELAY, data);
6750
6751         data = RREG32(RLC_PG_DELAY_2);
6752         data &= ~0xff;
6753         data |= 0x3;
6754         WREG32(RLC_PG_DELAY_2, data);
6755
6756         data = RREG32(RLC_AUTO_PG_CTRL);
6757         data &= ~GRBM_REG_SGIT_MASK;
6758         data |= GRBM_REG_SGIT(0x700);
6759         WREG32(RLC_AUTO_PG_CTRL, data);
6760
6761 }
6762
6763 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6764 {
6765         cik_enable_gfx_cgpg(rdev, enable);
6766         cik_enable_gfx_static_mgpg(rdev, enable);
6767         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6768 }
6769
6770 u32 cik_get_csb_size(struct radeon_device *rdev)
6771 {
6772         u32 count = 0;
6773         const struct cs_section_def *sect = NULL;
6774         const struct cs_extent_def *ext = NULL;
6775
6776         if (rdev->rlc.cs_data == NULL)
6777                 return 0;
6778
6779         /* begin clear state */
6780         count += 2;
6781         /* context control state */
6782         count += 3;
6783
6784         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6785                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6786                         if (sect->id == SECT_CONTEXT)
6787                                 count += 2 + ext->reg_count;
6788                         else
6789                                 return 0;
6790                 }
6791         }
6792         /* pa_sc_raster_config/pa_sc_raster_config1 */
6793         count += 4;
6794         /* end clear state */
6795         count += 2;
6796         /* clear state */
6797         count += 2;
6798
6799         return count;
6800 }
6801
6802 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6803 {
6804         u32 count = 0, i;
6805         const struct cs_section_def *sect = NULL;
6806         const struct cs_extent_def *ext = NULL;
6807
6808         if (rdev->rlc.cs_data == NULL)
6809                 return;
6810         if (buffer == NULL)
6811                 return;
6812
6813         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6814         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6815
6816         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6817         buffer[count++] = cpu_to_le32(0x80000000);
6818         buffer[count++] = cpu_to_le32(0x80000000);
6819
6820         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6821                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6822                         if (sect->id == SECT_CONTEXT) {
6823                                 buffer[count++] =
6824                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6825                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6826                                 for (i = 0; i < ext->reg_count; i++)
6827                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6828                         } else {
6829                                 return;
6830                         }
6831                 }
6832         }
6833
6834         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6835         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6836         switch (rdev->family) {
6837         case CHIP_BONAIRE:
6838                 buffer[count++] = cpu_to_le32(0x16000012);
6839                 buffer[count++] = cpu_to_le32(0x00000000);
6840                 break;
6841         case CHIP_KAVERI:
6842                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6843                 buffer[count++] = cpu_to_le32(0x00000000);
6844                 break;
6845         case CHIP_KABINI:
6846         case CHIP_MULLINS:
6847                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6848                 buffer[count++] = cpu_to_le32(0x00000000);
6849                 break;
6850         case CHIP_HAWAII:
6851                 buffer[count++] = cpu_to_le32(0x3a00161a);
6852                 buffer[count++] = cpu_to_le32(0x0000002e);
6853                 break;
6854         default:
6855                 buffer[count++] = cpu_to_le32(0x00000000);
6856                 buffer[count++] = cpu_to_le32(0x00000000);
6857                 break;
6858         }
6859
6860         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6861         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6862
6863         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6864         buffer[count++] = cpu_to_le32(0);
6865 }
6866
6867 static void cik_init_pg(struct radeon_device *rdev)
6868 {
6869         if (rdev->pg_flags) {
6870                 cik_enable_sck_slowdown_on_pu(rdev, true);
6871                 cik_enable_sck_slowdown_on_pd(rdev, true);
6872                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6873                         cik_init_gfx_cgpg(rdev);
6874                         cik_enable_cp_pg(rdev, true);
6875                         cik_enable_gds_pg(rdev, true);
6876                 }
6877                 cik_init_ao_cu_mask(rdev);
6878                 cik_update_gfx_pg(rdev, true);
6879         }
6880 }
6881
6882 static void cik_fini_pg(struct radeon_device *rdev)
6883 {
6884         if (rdev->pg_flags) {
6885                 cik_update_gfx_pg(rdev, false);
6886                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6887                         cik_enable_cp_pg(rdev, false);
6888                         cik_enable_gds_pg(rdev, false);
6889                 }
6890         }
6891 }
6892
6893 /*
6894  * Interrupts
6895  * Starting with r6xx, interrupts are handled via a ring buffer.
6896  * Ring buffers are areas of GPU accessible memory that the GPU
6897  * writes interrupt vectors into and the host reads vectors out of.
6898  * There is a rptr (read pointer) that determines where the
6899  * host is currently reading, and a wptr (write pointer)
6900  * which determines where the GPU has written.  When the
6901  * pointers are equal, the ring is idle.  When the GPU
6902  * writes vectors to the ring buffer, it increments the
6903  * wptr.  When there is an interrupt, the host then starts
6904  * fetching commands and processing them until the pointers are
6905  * equal again at which point it updates the rptr.
6906  */
6907
6908 /**
6909  * cik_enable_interrupts - Enable the interrupt ring buffer
6910  *
6911  * @rdev: radeon_device pointer
6912  *
6913  * Enable the interrupt ring buffer (CIK).
6914  */
6915 static void cik_enable_interrupts(struct radeon_device *rdev)
6916 {
6917         u32 ih_cntl = RREG32(IH_CNTL);
6918         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6919
6920         ih_cntl |= ENABLE_INTR;
6921         ih_rb_cntl |= IH_RB_ENABLE;
6922         WREG32(IH_CNTL, ih_cntl);
6923         WREG32(IH_RB_CNTL, ih_rb_cntl);
6924         rdev->ih.enabled = true;
6925 }
6926
6927 /**
6928  * cik_disable_interrupts - Disable the interrupt ring buffer
6929  *
6930  * @rdev: radeon_device pointer
6931  *
6932  * Disable the interrupt ring buffer (CIK).
6933  */
6934 static void cik_disable_interrupts(struct radeon_device *rdev)
6935 {
6936         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6937         u32 ih_cntl = RREG32(IH_CNTL);
6938
6939         ih_rb_cntl &= ~IH_RB_ENABLE;
6940         ih_cntl &= ~ENABLE_INTR;
6941         WREG32(IH_RB_CNTL, ih_rb_cntl);
6942         WREG32(IH_CNTL, ih_cntl);
6943         /* set rptr, wptr to 0 */
6944         WREG32(IH_RB_RPTR, 0);
6945         WREG32(IH_RB_WPTR, 0);
6946         rdev->ih.enabled = false;
6947         rdev->ih.rptr = 0;
6948 }
6949
6950 /**
6951  * cik_disable_interrupt_state - Disable all interrupt sources
6952  *
6953  * @rdev: radeon_device pointer
6954  *
6955  * Clear all interrupt enable bits used by the driver (CIK).
6956  */
6957 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6958 {
6959         u32 tmp;
6960
6961         /* gfx ring */
6962         tmp = RREG32(CP_INT_CNTL_RING0) &
6963                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6964         WREG32(CP_INT_CNTL_RING0, tmp);
6965         /* sdma */
6966         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6967         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6968         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6969         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6970         /* compute queues */
6971         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6972         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6973         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6974         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6975         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6976         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6977         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6978         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6979         /* grbm */
6980         WREG32(GRBM_INT_CNTL, 0);
6981         /* SRBM */
6982         WREG32(SRBM_INT_CNTL, 0);
6983         /* vline/vblank, etc. */
6984         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6985         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6986         if (rdev->num_crtc >= 4) {
6987                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6988                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6989         }
6990         if (rdev->num_crtc >= 6) {
6991                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6992                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6993         }
6994         /* pflip */
6995         if (rdev->num_crtc >= 2) {
6996                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6997                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6998         }
6999         if (rdev->num_crtc >= 4) {
7000                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7001                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7002         }
7003         if (rdev->num_crtc >= 6) {
7004                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7005                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7006         }
7007
7008         /* dac hotplug */
7009         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7010
7011         /* digital hotplug */
7012         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013         WREG32(DC_HPD1_INT_CONTROL, tmp);
7014         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7015         WREG32(DC_HPD2_INT_CONTROL, tmp);
7016         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7017         WREG32(DC_HPD3_INT_CONTROL, tmp);
7018         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7019         WREG32(DC_HPD4_INT_CONTROL, tmp);
7020         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7021         WREG32(DC_HPD5_INT_CONTROL, tmp);
7022         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7023         WREG32(DC_HPD6_INT_CONTROL, tmp);
7024
7025 }
7026
7027 /**
7028  * cik_irq_init - init and enable the interrupt ring
7029  *
7030  * @rdev: radeon_device pointer
7031  *
7032  * Allocate a ring buffer for the interrupt controller,
7033  * enable the RLC, disable interrupts, enable the IH
7034  * ring buffer and enable it (CIK).
7035  * Called at device load and reume.
7036  * Returns 0 for success, errors for failure.
7037  */
7038 static int cik_irq_init(struct radeon_device *rdev)
7039 {
7040         int ret = 0;
7041         int rb_bufsz;
7042         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7043
7044         /* allocate ring */
7045         ret = r600_ih_ring_alloc(rdev);
7046         if (ret)
7047                 return ret;
7048
7049         /* disable irqs */
7050         cik_disable_interrupts(rdev);
7051
7052         /* init rlc */
7053         ret = cik_rlc_resume(rdev);
7054         if (ret) {
7055                 r600_ih_ring_fini(rdev);
7056                 return ret;
7057         }
7058
7059         /* setup interrupt control */
7060         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7061         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7062         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7063         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7064          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7065          */
7066         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7067         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7068         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7069         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7070
7071         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7072         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7073
7074         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7075                       IH_WPTR_OVERFLOW_CLEAR |
7076                       (rb_bufsz << 1));
7077
7078         if (rdev->wb.enabled)
7079                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7080
7081         /* set the writeback address whether it's enabled or not */
7082         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7083         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7084
7085         WREG32(IH_RB_CNTL, ih_rb_cntl);
7086
7087         /* set rptr, wptr to 0 */
7088         WREG32(IH_RB_RPTR, 0);
7089         WREG32(IH_RB_WPTR, 0);
7090
7091         /* Default settings for IH_CNTL (disabled at first) */
7092         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7093         /* RPTR_REARM only works if msi's are enabled */
7094         if (rdev->msi_enabled)
7095                 ih_cntl |= RPTR_REARM;
7096         WREG32(IH_CNTL, ih_cntl);
7097
7098         /* force the active interrupt state to all disabled */
7099         cik_disable_interrupt_state(rdev);
7100
7101         pci_set_master(rdev->pdev);
7102
7103         /* enable irqs */
7104         cik_enable_interrupts(rdev);
7105
7106         return ret;
7107 }
7108
7109 /**
7110  * cik_irq_set - enable/disable interrupt sources
7111  *
7112  * @rdev: radeon_device pointer
7113  *
7114  * Enable interrupt sources on the GPU (vblanks, hpd,
7115  * etc.) (CIK).
7116  * Returns 0 for success, errors for failure.
7117  */
7118 int cik_irq_set(struct radeon_device *rdev)
7119 {
7120         u32 cp_int_cntl;
7121         u32 cp_m1p0;
7122         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7123         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7124         u32 grbm_int_cntl = 0;
7125         u32 dma_cntl, dma_cntl1;
7126
7127         if (!rdev->irq.installed) {
7128                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7129                 return -EINVAL;
7130         }
7131         /* don't enable anything if the ih is disabled */
7132         if (!rdev->ih.enabled) {
7133                 cik_disable_interrupts(rdev);
7134                 /* force the active interrupt state to all disabled */
7135                 cik_disable_interrupt_state(rdev);
7136                 return 0;
7137         }
7138
7139         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7140                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7141         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7142
7143         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7144         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7145         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7146         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7147         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7148         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7149
7150         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7151         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7152
7153         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7154
7155         /* enable CP interrupts on all rings */
7156         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7157                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7158                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7159         }
7160         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7161                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7162                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7163                 if (ring->me == 1) {
7164                         switch (ring->pipe) {
7165                         case 0:
7166                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7167                                 break;
7168                         default:
7169                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7170                                 break;
7171                         }
7172                 } else {
7173                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7174                 }
7175         }
7176         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7177                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7178                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7179                 if (ring->me == 1) {
7180                         switch (ring->pipe) {
7181                         case 0:
7182                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7183                                 break;
7184                         default:
7185                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7186                                 break;
7187                         }
7188                 } else {
7189                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7190                 }
7191         }
7192
7193         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7194                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7195                 dma_cntl |= TRAP_ENABLE;
7196         }
7197
7198         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7199                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7200                 dma_cntl1 |= TRAP_ENABLE;
7201         }
7202
7203         if (rdev->irq.crtc_vblank_int[0] ||
7204             atomic_read(&rdev->irq.pflip[0])) {
7205                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7206                 crtc1 |= VBLANK_INTERRUPT_MASK;
7207         }
7208         if (rdev->irq.crtc_vblank_int[1] ||
7209             atomic_read(&rdev->irq.pflip[1])) {
7210                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7211                 crtc2 |= VBLANK_INTERRUPT_MASK;
7212         }
7213         if (rdev->irq.crtc_vblank_int[2] ||
7214             atomic_read(&rdev->irq.pflip[2])) {
7215                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7216                 crtc3 |= VBLANK_INTERRUPT_MASK;
7217         }
7218         if (rdev->irq.crtc_vblank_int[3] ||
7219             atomic_read(&rdev->irq.pflip[3])) {
7220                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7221                 crtc4 |= VBLANK_INTERRUPT_MASK;
7222         }
7223         if (rdev->irq.crtc_vblank_int[4] ||
7224             atomic_read(&rdev->irq.pflip[4])) {
7225                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7226                 crtc5 |= VBLANK_INTERRUPT_MASK;
7227         }
7228         if (rdev->irq.crtc_vblank_int[5] ||
7229             atomic_read(&rdev->irq.pflip[5])) {
7230                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7231                 crtc6 |= VBLANK_INTERRUPT_MASK;
7232         }
7233         if (rdev->irq.hpd[0]) {
7234                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7235                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7236         }
7237         if (rdev->irq.hpd[1]) {
7238                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7239                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7240         }
7241         if (rdev->irq.hpd[2]) {
7242                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7243                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7244         }
7245         if (rdev->irq.hpd[3]) {
7246                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7247                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7248         }
7249         if (rdev->irq.hpd[4]) {
7250                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7251                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7252         }
7253         if (rdev->irq.hpd[5]) {
7254                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7255                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7256         }
7257
7258         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7259
7260         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7261         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7262
7263         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7264
7265         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7266
7267         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7268         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7269         if (rdev->num_crtc >= 4) {
7270                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7271                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7272         }
7273         if (rdev->num_crtc >= 6) {
7274                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7275                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7276         }
7277
7278         if (rdev->num_crtc >= 2) {
7279                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7280                        GRPH_PFLIP_INT_MASK);
7281                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7282                        GRPH_PFLIP_INT_MASK);
7283         }
7284         if (rdev->num_crtc >= 4) {
7285                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7286                        GRPH_PFLIP_INT_MASK);
7287                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7288                        GRPH_PFLIP_INT_MASK);
7289         }
7290         if (rdev->num_crtc >= 6) {
7291                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7292                        GRPH_PFLIP_INT_MASK);
7293                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7294                        GRPH_PFLIP_INT_MASK);
7295         }
7296
7297         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7298         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7299         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7300         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7301         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7302         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7303
7304         /* posting read */
7305         RREG32(SRBM_STATUS);
7306
7307         return 0;
7308 }
7309
7310 /**
7311  * cik_irq_ack - ack interrupt sources
7312  *
7313  * @rdev: radeon_device pointer
7314  *
7315  * Ack interrupt sources on the GPU (vblanks, hpd,
7316  * etc.) (CIK).  Certain interrupts sources are sw
7317  * generated and do not require an explicit ack.
7318  */
7319 static inline void cik_irq_ack(struct radeon_device *rdev)
7320 {
7321         u32 tmp;
7322
7323         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7324         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7325         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7326         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7327         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7328         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7329         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7330
7331         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7332                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7333         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7334                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7335         if (rdev->num_crtc >= 4) {
7336                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7337                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7338                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7339                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7340         }
7341         if (rdev->num_crtc >= 6) {
7342                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7343                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7344                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7345                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7346         }
7347
7348         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7349                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7350                        GRPH_PFLIP_INT_CLEAR);
7351         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7352                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7353                        GRPH_PFLIP_INT_CLEAR);
7354         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7355                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7356         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7357                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7358         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7359                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7360         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7361                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7362
7363         if (rdev->num_crtc >= 4) {
7364                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7365                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7366                                GRPH_PFLIP_INT_CLEAR);
7367                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7368                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7369                                GRPH_PFLIP_INT_CLEAR);
7370                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7371                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7372                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7373                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7374                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7375                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7376                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7377                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7378         }
7379
7380         if (rdev->num_crtc >= 6) {
7381                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7382                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7383                                GRPH_PFLIP_INT_CLEAR);
7384                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7385                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7386                                GRPH_PFLIP_INT_CLEAR);
7387                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7388                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7389                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7390                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7391                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7392                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7393                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7394                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7395         }
7396
7397         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7398                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7399                 tmp |= DC_HPDx_INT_ACK;
7400                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7401         }
7402         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7403                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7404                 tmp |= DC_HPDx_INT_ACK;
7405                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7406         }
7407         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7408                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7409                 tmp |= DC_HPDx_INT_ACK;
7410                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7411         }
7412         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7413                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7414                 tmp |= DC_HPDx_INT_ACK;
7415                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7416         }
7417         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7418                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7419                 tmp |= DC_HPDx_INT_ACK;
7420                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7421         }
7422         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7423                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7424                 tmp |= DC_HPDx_INT_ACK;
7425                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7426         }
7427         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7428                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7429                 tmp |= DC_HPDx_RX_INT_ACK;
7430                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7431         }
7432         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7433                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7434                 tmp |= DC_HPDx_RX_INT_ACK;
7435                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7436         }
7437         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7438                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7439                 tmp |= DC_HPDx_RX_INT_ACK;
7440                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7441         }
7442         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7443                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7444                 tmp |= DC_HPDx_RX_INT_ACK;
7445                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7446         }
7447         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7448                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7449                 tmp |= DC_HPDx_RX_INT_ACK;
7450                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7451         }
7452         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7453                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7454                 tmp |= DC_HPDx_RX_INT_ACK;
7455                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7456         }
7457 }
7458
7459 /**
7460  * cik_irq_disable - disable interrupts
7461  *
7462  * @rdev: radeon_device pointer
7463  *
7464  * Disable interrupts on the hw (CIK).
7465  */
7466 static void cik_irq_disable(struct radeon_device *rdev)
7467 {
7468         cik_disable_interrupts(rdev);
7469         /* Wait and acknowledge irq */
7470         mdelay(1);
7471         cik_irq_ack(rdev);
7472         cik_disable_interrupt_state(rdev);
7473 }
7474
7475 /**
7476  * cik_irq_disable - disable interrupts for suspend
7477  *
7478  * @rdev: radeon_device pointer
7479  *
7480  * Disable interrupts and stop the RLC (CIK).
7481  * Used for suspend.
7482  */
7483 static void cik_irq_suspend(struct radeon_device *rdev)
7484 {
7485         cik_irq_disable(rdev);
7486         cik_rlc_stop(rdev);
7487 }
7488
7489 /**
7490  * cik_irq_fini - tear down interrupt support
7491  *
7492  * @rdev: radeon_device pointer
7493  *
7494  * Disable interrupts on the hw and free the IH ring
7495  * buffer (CIK).
7496  * Used for driver unload.
7497  */
7498 static void cik_irq_fini(struct radeon_device *rdev)
7499 {
7500         cik_irq_suspend(rdev);
7501         r600_ih_ring_fini(rdev);
7502 }
7503
7504 /**
7505  * cik_get_ih_wptr - get the IH ring buffer wptr
7506  *
7507  * @rdev: radeon_device pointer
7508  *
7509  * Get the IH ring buffer wptr from either the register
7510  * or the writeback memory buffer (CIK).  Also check for
7511  * ring buffer overflow and deal with it.
7512  * Used by cik_irq_process().
7513  * Returns the value of the wptr.
7514  */
7515 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7516 {
7517         u32 wptr, tmp;
7518
7519         if (rdev->wb.enabled)
7520                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7521         else
7522                 wptr = RREG32(IH_RB_WPTR);
7523
7524         if (wptr & RB_OVERFLOW) {
7525                 wptr &= ~RB_OVERFLOW;
7526                 /* When a ring buffer overflow happen start parsing interrupt
7527                  * from the last not overwritten vector (wptr + 16). Hopefully
7528                  * this should allow us to catchup.
7529                  */
7530                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7531                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7532                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7533                 tmp = RREG32(IH_RB_CNTL);
7534                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7535                 WREG32(IH_RB_CNTL, tmp);
7536         }
7537         return (wptr & rdev->ih.ptr_mask);
7538 }
7539
7540 /*        CIK IV Ring
7541  * Each IV ring entry is 128 bits:
7542  * [7:0]    - interrupt source id
7543  * [31:8]   - reserved
7544  * [59:32]  - interrupt source data
7545  * [63:60]  - reserved
7546  * [71:64]  - RINGID
7547  *            CP:
7548  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7549  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7550  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7551  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7552  *            PIPE_ID - ME0 0=3D
7553  *                    - ME1&2 compute dispatcher (4 pipes each)
7554  *            SDMA:
7555  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7556  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7557  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7558  * [79:72]  - VMID
7559  * [95:80]  - PASID
7560  * [127:96] - reserved
7561  */
7562 /**
7563  * cik_irq_process - interrupt handler
7564  *
7565  * @rdev: radeon_device pointer
7566  *
7567  * Interrupt hander (CIK).  Walk the IH ring,
7568  * ack interrupts and schedule work to handle
7569  * interrupt events.
7570  * Returns irq process return code.
7571  */
7572 int cik_irq_process(struct radeon_device *rdev)
7573 {
7574         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7575         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7576         u32 wptr;
7577         u32 rptr;
7578         u32 src_id, src_data, ring_id;
7579         u8 me_id, pipe_id, queue_id;
7580         u32 ring_index;
7581         bool queue_hotplug = false;
7582         bool queue_dp = false;
7583         bool queue_reset = false;
7584         u32 addr, status, mc_client;
7585         bool queue_thermal = false;
7586
7587         if (!rdev->ih.enabled || rdev->shutdown)
7588                 return IRQ_NONE;
7589
7590         wptr = cik_get_ih_wptr(rdev);
7591
7592 restart_ih:
7593         /* is somebody else already processing irqs? */
7594         if (atomic_xchg(&rdev->ih.lock, 1))
7595                 return IRQ_NONE;
7596
7597         rptr = rdev->ih.rptr;
7598         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7599
7600         /* Order reading of wptr vs. reading of IH ring data */
7601         rmb();
7602
7603         /* display interrupts */
7604         cik_irq_ack(rdev);
7605
7606         while (rptr != wptr) {
7607                 /* wptr/rptr are in bytes! */
7608                 ring_index = rptr / 4;
7609
7610                 radeon_kfd_interrupt(rdev,
7611                                 (const void *) &rdev->ih.ring[ring_index]);
7612
7613                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7614                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7615                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7616
7617                 switch (src_id) {
7618                 case 1: /* D1 vblank/vline */
7619                         switch (src_data) {
7620                         case 0: /* D1 vblank */
7621                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7622                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7623
7624                                 if (rdev->irq.crtc_vblank_int[0]) {
7625                                         drm_handle_vblank(rdev->ddev, 0);
7626                                         rdev->pm.vblank_sync = true;
7627                                         wake_up(&rdev->irq.vblank_queue);
7628                                 }
7629                                 if (atomic_read(&rdev->irq.pflip[0]))
7630                                         radeon_crtc_handle_vblank(rdev, 0);
7631                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7632                                 DRM_DEBUG("IH: D1 vblank\n");
7633
7634                                 break;
7635                         case 1: /* D1 vline */
7636                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7637                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7638
7639                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7640                                 DRM_DEBUG("IH: D1 vline\n");
7641
7642                                 break;
7643                         default:
7644                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7645                                 break;
7646                         }
7647                         break;
7648                 case 2: /* D2 vblank/vline */
7649                         switch (src_data) {
7650                         case 0: /* D2 vblank */
7651                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7652                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7653
7654                                 if (rdev->irq.crtc_vblank_int[1]) {
7655                                         drm_handle_vblank(rdev->ddev, 1);
7656                                         rdev->pm.vblank_sync = true;
7657                                         wake_up(&rdev->irq.vblank_queue);
7658                                 }
7659                                 if (atomic_read(&rdev->irq.pflip[1]))
7660                                         radeon_crtc_handle_vblank(rdev, 1);
7661                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7662                                 DRM_DEBUG("IH: D2 vblank\n");
7663
7664                                 break;
7665                         case 1: /* D2 vline */
7666                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7667                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7668
7669                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7670                                 DRM_DEBUG("IH: D2 vline\n");
7671
7672                                 break;
7673                         default:
7674                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7675                                 break;
7676                         }
7677                         break;
7678                 case 3: /* D3 vblank/vline */
7679                         switch (src_data) {
7680                         case 0: /* D3 vblank */
7681                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7682                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7683
7684                                 if (rdev->irq.crtc_vblank_int[2]) {
7685                                         drm_handle_vblank(rdev->ddev, 2);
7686                                         rdev->pm.vblank_sync = true;
7687                                         wake_up(&rdev->irq.vblank_queue);
7688                                 }
7689                                 if (atomic_read(&rdev->irq.pflip[2]))
7690                                         radeon_crtc_handle_vblank(rdev, 2);
7691                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7692                                 DRM_DEBUG("IH: D3 vblank\n");
7693
7694                                 break;
7695                         case 1: /* D3 vline */
7696                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7697                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7698
7699                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7700                                 DRM_DEBUG("IH: D3 vline\n");
7701
7702                                 break;
7703                         default:
7704                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7705                                 break;
7706                         }
7707                         break;
7708                 case 4: /* D4 vblank/vline */
7709                         switch (src_data) {
7710                         case 0: /* D4 vblank */
7711                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7712                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7713
7714                                 if (rdev->irq.crtc_vblank_int[3]) {
7715                                         drm_handle_vblank(rdev->ddev, 3);
7716                                         rdev->pm.vblank_sync = true;
7717                                         wake_up(&rdev->irq.vblank_queue);
7718                                 }
7719                                 if (atomic_read(&rdev->irq.pflip[3]))
7720                                         radeon_crtc_handle_vblank(rdev, 3);
7721                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7722                                 DRM_DEBUG("IH: D4 vblank\n");
7723
7724                                 break;
7725                         case 1: /* D4 vline */
7726                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7727                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7728
7729                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7730                                 DRM_DEBUG("IH: D4 vline\n");
7731
7732                                 break;
7733                         default:
7734                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7735                                 break;
7736                         }
7737                         break;
7738                 case 5: /* D5 vblank/vline */
7739                         switch (src_data) {
7740                         case 0: /* D5 vblank */
7741                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7742                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7743
7744                                 if (rdev->irq.crtc_vblank_int[4]) {
7745                                         drm_handle_vblank(rdev->ddev, 4);
7746                                         rdev->pm.vblank_sync = true;
7747                                         wake_up(&rdev->irq.vblank_queue);
7748                                 }
7749                                 if (atomic_read(&rdev->irq.pflip[4]))
7750                                         radeon_crtc_handle_vblank(rdev, 4);
7751                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7752                                 DRM_DEBUG("IH: D5 vblank\n");
7753
7754                                 break;
7755                         case 1: /* D5 vline */
7756                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7757                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7758
7759                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7760                                 DRM_DEBUG("IH: D5 vline\n");
7761
7762                                 break;
7763                         default:
7764                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7765                                 break;
7766                         }
7767                         break;
7768                 case 6: /* D6 vblank/vline */
7769                         switch (src_data) {
7770                         case 0: /* D6 vblank */
7771                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7772                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7773
7774                                 if (rdev->irq.crtc_vblank_int[5]) {
7775                                         drm_handle_vblank(rdev->ddev, 5);
7776                                         rdev->pm.vblank_sync = true;
7777                                         wake_up(&rdev->irq.vblank_queue);
7778                                 }
7779                                 if (atomic_read(&rdev->irq.pflip[5]))
7780                                         radeon_crtc_handle_vblank(rdev, 5);
7781                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7782                                 DRM_DEBUG("IH: D6 vblank\n");
7783
7784                                 break;
7785                         case 1: /* D6 vline */
7786                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7787                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7788
7789                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7790                                 DRM_DEBUG("IH: D6 vline\n");
7791
7792                                 break;
7793                         default:
7794                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7795                                 break;
7796                         }
7797                         break;
7798                 case 8: /* D1 page flip */
7799                 case 10: /* D2 page flip */
7800                 case 12: /* D3 page flip */
7801                 case 14: /* D4 page flip */
7802                 case 16: /* D5 page flip */
7803                 case 18: /* D6 page flip */
7804                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7805                         if (radeon_use_pflipirq > 0)
7806                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7807                         break;
7808                 case 42: /* HPD hotplug */
7809                         switch (src_data) {
7810                         case 0:
7811                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7812                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7813
7814                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7815                                 queue_hotplug = true;
7816                                 DRM_DEBUG("IH: HPD1\n");
7817
7818                                 break;
7819                         case 1:
7820                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7821                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7822
7823                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7824                                 queue_hotplug = true;
7825                                 DRM_DEBUG("IH: HPD2\n");
7826
7827                                 break;
7828                         case 2:
7829                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7830                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7831
7832                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7833                                 queue_hotplug = true;
7834                                 DRM_DEBUG("IH: HPD3\n");
7835
7836                                 break;
7837                         case 3:
7838                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7839                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7840
7841                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7842                                 queue_hotplug = true;
7843                                 DRM_DEBUG("IH: HPD4\n");
7844
7845                                 break;
7846                         case 4:
7847                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7848                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7849
7850                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7851                                 queue_hotplug = true;
7852                                 DRM_DEBUG("IH: HPD5\n");
7853
7854                                 break;
7855                         case 5:
7856                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7857                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7858
7859                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7860                                 queue_hotplug = true;
7861                                 DRM_DEBUG("IH: HPD6\n");
7862
7863                                 break;
7864                         case 6:
7865                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7866                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7867
7868                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7869                                 queue_dp = true;
7870                                 DRM_DEBUG("IH: HPD_RX 1\n");
7871
7872                                 break;
7873                         case 7:
7874                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7875                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7876
7877                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7878                                 queue_dp = true;
7879                                 DRM_DEBUG("IH: HPD_RX 2\n");
7880
7881                                 break;
7882                         case 8:
7883                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7884                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7885
7886                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7887                                 queue_dp = true;
7888                                 DRM_DEBUG("IH: HPD_RX 3\n");
7889
7890                                 break;
7891                         case 9:
7892                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7893                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7894
7895                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7896                                 queue_dp = true;
7897                                 DRM_DEBUG("IH: HPD_RX 4\n");
7898
7899                                 break;
7900                         case 10:
7901                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7902                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7903
7904                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7905                                 queue_dp = true;
7906                                 DRM_DEBUG("IH: HPD_RX 5\n");
7907
7908                                 break;
7909                         case 11:
7910                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7911                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7912
7913                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7914                                 queue_dp = true;
7915                                 DRM_DEBUG("IH: HPD_RX 6\n");
7916
7917                                 break;
7918                         default:
7919                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7920                                 break;
7921                         }
7922                         break;
7923                 case 96:
7924                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7925                         WREG32(SRBM_INT_ACK, 0x1);
7926                         break;
7927                 case 124: /* UVD */
7928                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7929                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7930                         break;
7931                 case 146:
7932                 case 147:
7933                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7934                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7935                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7936                         /* reset addr and status */
7937                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7938                         if (addr == 0x0 && status == 0x0)
7939                                 break;
7940                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7941                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7942                                 addr);
7943                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7944                                 status);
7945                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7946                         break;
7947                 case 167: /* VCE */
7948                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7949                         switch (src_data) {
7950                         case 0:
7951                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7952                                 break;
7953                         case 1:
7954                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7955                                 break;
7956                         default:
7957                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7958                                 break;
7959                         }
7960                         break;
7961                 case 176: /* GFX RB CP_INT */
7962                 case 177: /* GFX IB CP_INT */
7963                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7964                         break;
7965                 case 181: /* CP EOP event */
7966                         DRM_DEBUG("IH: CP EOP\n");
7967                         /* XXX check the bitfield order! */
7968                         me_id = (ring_id & 0x60) >> 5;
7969                         pipe_id = (ring_id & 0x18) >> 3;
7970                         queue_id = (ring_id & 0x7) >> 0;
7971                         switch (me_id) {
7972                         case 0:
7973                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7974                                 break;
7975                         case 1:
7976                         case 2:
7977                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7978                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7979                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7980                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7981                                 break;
7982                         }
7983                         break;
7984                 case 184: /* CP Privileged reg access */
7985                         DRM_ERROR("Illegal register access in command stream\n");
7986                         /* XXX check the bitfield order! */
7987                         me_id = (ring_id & 0x60) >> 5;
7988                         pipe_id = (ring_id & 0x18) >> 3;
7989                         queue_id = (ring_id & 0x7) >> 0;
7990                         switch (me_id) {
7991                         case 0:
7992                                 /* This results in a full GPU reset, but all we need to do is soft
7993                                  * reset the CP for gfx
7994                                  */
7995                                 queue_reset = true;
7996                                 break;
7997                         case 1:
7998                                 /* XXX compute */
7999                                 queue_reset = true;
8000                                 break;
8001                         case 2:
8002                                 /* XXX compute */
8003                                 queue_reset = true;
8004                                 break;
8005                         }
8006                         break;
8007                 case 185: /* CP Privileged inst */
8008                         DRM_ERROR("Illegal instruction in command stream\n");
8009                         /* XXX check the bitfield order! */
8010                         me_id = (ring_id & 0x60) >> 5;
8011                         pipe_id = (ring_id & 0x18) >> 3;
8012                         queue_id = (ring_id & 0x7) >> 0;
8013                         switch (me_id) {
8014                         case 0:
8015                                 /* This results in a full GPU reset, but all we need to do is soft
8016                                  * reset the CP for gfx
8017                                  */
8018                                 queue_reset = true;
8019                                 break;
8020                         case 1:
8021                                 /* XXX compute */
8022                                 queue_reset = true;
8023                                 break;
8024                         case 2:
8025                                 /* XXX compute */
8026                                 queue_reset = true;
8027                                 break;
8028                         }
8029                         break;
8030                 case 224: /* SDMA trap event */
8031                         /* XXX check the bitfield order! */
8032                         me_id = (ring_id & 0x3) >> 0;
8033                         queue_id = (ring_id & 0xc) >> 2;
8034                         DRM_DEBUG("IH: SDMA trap\n");
8035                         switch (me_id) {
8036                         case 0:
8037                                 switch (queue_id) {
8038                                 case 0:
8039                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8040                                         break;
8041                                 case 1:
8042                                         /* XXX compute */
8043                                         break;
8044                                 case 2:
8045                                         /* XXX compute */
8046                                         break;
8047                                 }
8048                                 break;
8049                         case 1:
8050                                 switch (queue_id) {
8051                                 case 0:
8052                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8053                                         break;
8054                                 case 1:
8055                                         /* XXX compute */
8056                                         break;
8057                                 case 2:
8058                                         /* XXX compute */
8059                                         break;
8060                                 }
8061                                 break;
8062                         }
8063                         break;
8064                 case 230: /* thermal low to high */
8065                         DRM_DEBUG("IH: thermal low to high\n");
8066                         rdev->pm.dpm.thermal.high_to_low = false;
8067                         queue_thermal = true;
8068                         break;
8069                 case 231: /* thermal high to low */
8070                         DRM_DEBUG("IH: thermal high to low\n");
8071                         rdev->pm.dpm.thermal.high_to_low = true;
8072                         queue_thermal = true;
8073                         break;
8074                 case 233: /* GUI IDLE */
8075                         DRM_DEBUG("IH: GUI idle\n");
8076                         break;
8077                 case 241: /* SDMA Privileged inst */
8078                 case 247: /* SDMA Privileged inst */
8079                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8080                         /* XXX check the bitfield order! */
8081                         me_id = (ring_id & 0x3) >> 0;
8082                         queue_id = (ring_id & 0xc) >> 2;
8083                         switch (me_id) {
8084                         case 0:
8085                                 switch (queue_id) {
8086                                 case 0:
8087                                         queue_reset = true;
8088                                         break;
8089                                 case 1:
8090                                         /* XXX compute */
8091                                         queue_reset = true;
8092                                         break;
8093                                 case 2:
8094                                         /* XXX compute */
8095                                         queue_reset = true;
8096                                         break;
8097                                 }
8098                                 break;
8099                         case 1:
8100                                 switch (queue_id) {
8101                                 case 0:
8102                                         queue_reset = true;
8103                                         break;
8104                                 case 1:
8105                                         /* XXX compute */
8106                                         queue_reset = true;
8107                                         break;
8108                                 case 2:
8109                                         /* XXX compute */
8110                                         queue_reset = true;
8111                                         break;
8112                                 }
8113                                 break;
8114                         }
8115                         break;
8116                 default:
8117                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8118                         break;
8119                 }
8120
8121                 /* wptr/rptr are in bytes! */
8122                 rptr += 16;
8123                 rptr &= rdev->ih.ptr_mask;
8124                 WREG32(IH_RB_RPTR, rptr);
8125         }
8126         if (queue_dp)
8127                 schedule_work(&rdev->dp_work);
8128         if (queue_hotplug)
8129                 schedule_delayed_work(&rdev->hotplug_work, 0);
8130         if (queue_reset) {
8131                 rdev->needs_reset = true;
8132                 wake_up_all(&rdev->fence_queue);
8133         }
8134         if (queue_thermal)
8135                 schedule_work(&rdev->pm.dpm.thermal.work);
8136         rdev->ih.rptr = rptr;
8137         atomic_set(&rdev->ih.lock, 0);
8138
8139         /* make sure wptr hasn't changed while processing */
8140         wptr = cik_get_ih_wptr(rdev);
8141         if (wptr != rptr)
8142                 goto restart_ih;
8143
8144         return IRQ_HANDLED;
8145 }
8146
8147 /*
8148  * startup/shutdown callbacks
8149  */
8150 static void cik_uvd_init(struct radeon_device *rdev)
8151 {
8152         int r;
8153
8154         if (!rdev->has_uvd)
8155                 return;
8156
8157         r = radeon_uvd_init(rdev);
8158         if (r) {
8159                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8160                 /*
8161                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8162                  * to early fails cik_uvd_start() and thus nothing happens
8163                  * there. So it is pointless to try to go through that code
8164                  * hence why we disable uvd here.
8165                  */
8166                 rdev->has_uvd = 0;
8167                 return;
8168         }
8169         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8170         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8171 }
8172
8173 static void cik_uvd_start(struct radeon_device *rdev)
8174 {
8175         int r;
8176
8177         if (!rdev->has_uvd)
8178                 return;
8179
8180         r = radeon_uvd_resume(rdev);
8181         if (r) {
8182                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8183                 goto error;
8184         }
8185         r = uvd_v4_2_resume(rdev);
8186         if (r) {
8187                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8188                 goto error;
8189         }
8190         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8191         if (r) {
8192                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8193                 goto error;
8194         }
8195         return;
8196
8197 error:
8198         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8199 }
8200
8201 static void cik_uvd_resume(struct radeon_device *rdev)
8202 {
8203         struct radeon_ring *ring;
8204         int r;
8205
8206         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8207                 return;
8208
8209         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8210         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8211         if (r) {
8212                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8213                 return;
8214         }
8215         r = uvd_v1_0_init(rdev);
8216         if (r) {
8217                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8218                 return;
8219         }
8220 }
8221
8222 static void cik_vce_init(struct radeon_device *rdev)
8223 {
8224         int r;
8225
8226         if (!rdev->has_vce)
8227                 return;
8228
8229         r = radeon_vce_init(rdev);
8230         if (r) {
8231                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8232                 /*
8233                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8234                  * to early fails cik_vce_start() and thus nothing happens
8235                  * there. So it is pointless to try to go through that code
8236                  * hence why we disable vce here.
8237                  */
8238                 rdev->has_vce = 0;
8239                 return;
8240         }
8241         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8242         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8243         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8244         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8245 }
8246
8247 static void cik_vce_start(struct radeon_device *rdev)
8248 {
8249         int r;
8250
8251         if (!rdev->has_vce)
8252                 return;
8253
8254         r = radeon_vce_resume(rdev);
8255         if (r) {
8256                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8257                 goto error;
8258         }
8259         r = vce_v2_0_resume(rdev);
8260         if (r) {
8261                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8262                 goto error;
8263         }
8264         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8265         if (r) {
8266                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8267                 goto error;
8268         }
8269         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8270         if (r) {
8271                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8272                 goto error;
8273         }
8274         return;
8275
8276 error:
8277         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8278         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8279 }
8280
8281 static void cik_vce_resume(struct radeon_device *rdev)
8282 {
8283         struct radeon_ring *ring;
8284         int r;
8285
8286         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8287                 return;
8288
8289         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8290         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8291         if (r) {
8292                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8293                 return;
8294         }
8295         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8296         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8297         if (r) {
8298                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8299                 return;
8300         }
8301         r = vce_v1_0_init(rdev);
8302         if (r) {
8303                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8304                 return;
8305         }
8306 }
8307
8308 /**
8309  * cik_startup - program the asic to a functional state
8310  *
8311  * @rdev: radeon_device pointer
8312  *
8313  * Programs the asic to a functional state (CIK).
8314  * Called by cik_init() and cik_resume().
8315  * Returns 0 for success, error for failure.
8316  */
8317 static int cik_startup(struct radeon_device *rdev)
8318 {
8319         struct radeon_ring *ring;
8320         u32 nop;
8321         int r;
8322
8323         /* enable pcie gen2/3 link */
8324         cik_pcie_gen3_enable(rdev);
8325         /* enable aspm */
8326         cik_program_aspm(rdev);
8327
8328         /* scratch needs to be initialized before MC */
8329         r = r600_vram_scratch_init(rdev);
8330         if (r)
8331                 return r;
8332
8333         cik_mc_program(rdev);
8334
8335         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8336                 r = ci_mc_load_microcode(rdev);
8337                 if (r) {
8338                         DRM_ERROR("Failed to load MC firmware!\n");
8339                         return r;
8340                 }
8341         }
8342
8343         r = cik_pcie_gart_enable(rdev);
8344         if (r)
8345                 return r;
8346         cik_gpu_init(rdev);
8347
8348         /* allocate rlc buffers */
8349         if (rdev->flags & RADEON_IS_IGP) {
8350                 if (rdev->family == CHIP_KAVERI) {
8351                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8352                         rdev->rlc.reg_list_size =
8353                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8354                 } else {
8355                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8356                         rdev->rlc.reg_list_size =
8357                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8358                 }
8359         }
8360         rdev->rlc.cs_data = ci_cs_data;
8361         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8362         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8363         r = sumo_rlc_init(rdev);
8364         if (r) {
8365                 DRM_ERROR("Failed to init rlc BOs!\n");
8366                 return r;
8367         }
8368
8369         /* allocate wb buffer */
8370         r = radeon_wb_init(rdev);
8371         if (r)
8372                 return r;
8373
8374         /* allocate mec buffers */
8375         r = cik_mec_init(rdev);
8376         if (r) {
8377                 DRM_ERROR("Failed to init MEC BOs!\n");
8378                 return r;
8379         }
8380
8381         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8382         if (r) {
8383                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8384                 return r;
8385         }
8386
8387         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8388         if (r) {
8389                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8390                 return r;
8391         }
8392
8393         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8394         if (r) {
8395                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8396                 return r;
8397         }
8398
8399         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8400         if (r) {
8401                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8402                 return r;
8403         }
8404
8405         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8406         if (r) {
8407                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8408                 return r;
8409         }
8410
8411         cik_uvd_start(rdev);
8412         cik_vce_start(rdev);
8413
8414         /* Enable IRQ */
8415         if (!rdev->irq.installed) {
8416                 r = radeon_irq_kms_init(rdev);
8417                 if (r)
8418                         return r;
8419         }
8420
8421         r = cik_irq_init(rdev);
8422         if (r) {
8423                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8424                 radeon_irq_kms_fini(rdev);
8425                 return r;
8426         }
8427         cik_irq_set(rdev);
8428
8429         if (rdev->family == CHIP_HAWAII) {
8430                 if (rdev->new_fw)
8431                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8432                 else
8433                         nop = RADEON_CP_PACKET2;
8434         } else {
8435                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8436         }
8437
8438         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8439         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8440                              nop);
8441         if (r)
8442                 return r;
8443
8444         /* set up the compute queues */
8445         /* type-2 packets are deprecated on MEC, use type-3 instead */
8446         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8447         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8448                              nop);
8449         if (r)
8450                 return r;
8451         ring->me = 1; /* first MEC */
8452         ring->pipe = 0; /* first pipe */
8453         ring->queue = 0; /* first queue */
8454         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8455
8456         /* type-2 packets are deprecated on MEC, use type-3 instead */
8457         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8458         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8459                              nop);
8460         if (r)
8461                 return r;
8462         /* dGPU only have 1 MEC */
8463         ring->me = 1; /* first MEC */
8464         ring->pipe = 0; /* first pipe */
8465         ring->queue = 1; /* second queue */
8466         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8467
8468         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8469         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8470                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8471         if (r)
8472                 return r;
8473
8474         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8475         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8476                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8477         if (r)
8478                 return r;
8479
8480         r = cik_cp_resume(rdev);
8481         if (r)
8482                 return r;
8483
8484         r = cik_sdma_resume(rdev);
8485         if (r)
8486                 return r;
8487
8488         cik_uvd_resume(rdev);
8489         cik_vce_resume(rdev);
8490
8491         r = radeon_ib_pool_init(rdev);
8492         if (r) {
8493                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8494                 return r;
8495         }
8496
8497         r = radeon_vm_manager_init(rdev);
8498         if (r) {
8499                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8500                 return r;
8501         }
8502
8503         r = radeon_audio_init(rdev);
8504         if (r)
8505                 return r;
8506
8507         r = radeon_kfd_resume(rdev);
8508         if (r)
8509                 return r;
8510
8511         return 0;
8512 }
8513
8514 /**
8515  * cik_resume - resume the asic to a functional state
8516  *
8517  * @rdev: radeon_device pointer
8518  *
8519  * Programs the asic to a functional state (CIK).
8520  * Called at resume.
8521  * Returns 0 for success, error for failure.
8522  */
8523 int cik_resume(struct radeon_device *rdev)
8524 {
8525         int r;
8526
8527         /* post card */
8528         atom_asic_init(rdev->mode_info.atom_context);
8529
8530         /* init golden registers */
8531         cik_init_golden_registers(rdev);
8532
8533         if (rdev->pm.pm_method == PM_METHOD_DPM)
8534                 radeon_pm_resume(rdev);
8535
8536         rdev->accel_working = true;
8537         r = cik_startup(rdev);
8538         if (r) {
8539                 DRM_ERROR("cik startup failed on resume\n");
8540                 rdev->accel_working = false;
8541                 return r;
8542         }
8543
8544         return r;
8545
8546 }
8547
8548 /**
8549  * cik_suspend - suspend the asic
8550  *
8551  * @rdev: radeon_device pointer
8552  *
8553  * Bring the chip into a state suitable for suspend (CIK).
8554  * Called at suspend.
8555  * Returns 0 for success.
8556  */
8557 int cik_suspend(struct radeon_device *rdev)
8558 {
8559         radeon_kfd_suspend(rdev);
8560         radeon_pm_suspend(rdev);
8561         radeon_audio_fini(rdev);
8562         radeon_vm_manager_fini(rdev);
8563         cik_cp_enable(rdev, false);
8564         cik_sdma_enable(rdev, false);
8565         if (rdev->has_uvd) {
8566                 uvd_v1_0_fini(rdev);
8567                 radeon_uvd_suspend(rdev);
8568         }
8569         if (rdev->has_vce)
8570                 radeon_vce_suspend(rdev);
8571         cik_fini_pg(rdev);
8572         cik_fini_cg(rdev);
8573         cik_irq_suspend(rdev);
8574         radeon_wb_disable(rdev);
8575         cik_pcie_gart_disable(rdev);
8576         return 0;
8577 }
8578
8579 /* Plan is to move initialization in that function and use
8580  * helper function so that radeon_device_init pretty much
8581  * do nothing more than calling asic specific function. This
8582  * should also allow to remove a bunch of callback function
8583  * like vram_info.
8584  */
8585 /**
8586  * cik_init - asic specific driver and hw init
8587  *
8588  * @rdev: radeon_device pointer
8589  *
8590  * Setup asic specific driver variables and program the hw
8591  * to a functional state (CIK).
8592  * Called at driver startup.
8593  * Returns 0 for success, errors for failure.
8594  */
8595 int cik_init(struct radeon_device *rdev)
8596 {
8597         struct radeon_ring *ring;
8598         int r;
8599
8600         /* Read BIOS */
8601         if (!radeon_get_bios(rdev)) {
8602                 if (ASIC_IS_AVIVO(rdev))
8603                         return -EINVAL;
8604         }
8605         /* Must be an ATOMBIOS */
8606         if (!rdev->is_atom_bios) {
8607                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8608                 return -EINVAL;
8609         }
8610         r = radeon_atombios_init(rdev);
8611         if (r)
8612                 return r;
8613
8614         /* Post card if necessary */
8615         if (!radeon_card_posted(rdev)) {
8616                 if (!rdev->bios) {
8617                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8618                         return -EINVAL;
8619                 }
8620                 DRM_INFO("GPU not posted. posting now...\n");
8621                 atom_asic_init(rdev->mode_info.atom_context);
8622         }
8623         /* init golden registers */
8624         cik_init_golden_registers(rdev);
8625         /* Initialize scratch registers */
8626         cik_scratch_init(rdev);
8627         /* Initialize surface registers */
8628         radeon_surface_init(rdev);
8629         /* Initialize clocks */
8630         radeon_get_clock_info(rdev->ddev);
8631
8632         /* Fence driver */
8633         r = radeon_fence_driver_init(rdev);
8634         if (r)
8635                 return r;
8636
8637         /* initialize memory controller */
8638         r = cik_mc_init(rdev);
8639         if (r)
8640                 return r;
8641         /* Memory manager */
8642         r = radeon_bo_init(rdev);
8643         if (r)
8644                 return r;
8645
8646         if (rdev->flags & RADEON_IS_IGP) {
8647                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8648                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8649                         r = cik_init_microcode(rdev);
8650                         if (r) {
8651                                 DRM_ERROR("Failed to load firmware!\n");
8652                                 return r;
8653                         }
8654                 }
8655         } else {
8656                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8657                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8658                     !rdev->mc_fw) {
8659                         r = cik_init_microcode(rdev);
8660                         if (r) {
8661                                 DRM_ERROR("Failed to load firmware!\n");
8662                                 return r;
8663                         }
8664                 }
8665         }
8666
8667         /* Initialize power management */
8668         radeon_pm_init(rdev);
8669
8670         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8671         ring->ring_obj = NULL;
8672         r600_ring_init(rdev, ring, 1024 * 1024);
8673
8674         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8675         ring->ring_obj = NULL;
8676         r600_ring_init(rdev, ring, 1024 * 1024);
8677         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8678         if (r)
8679                 return r;
8680
8681         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8682         ring->ring_obj = NULL;
8683         r600_ring_init(rdev, ring, 1024 * 1024);
8684         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8685         if (r)
8686                 return r;
8687
8688         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8689         ring->ring_obj = NULL;
8690         r600_ring_init(rdev, ring, 256 * 1024);
8691
8692         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8693         ring->ring_obj = NULL;
8694         r600_ring_init(rdev, ring, 256 * 1024);
8695
8696         cik_uvd_init(rdev);
8697         cik_vce_init(rdev);
8698
8699         rdev->ih.ring_obj = NULL;
8700         r600_ih_ring_init(rdev, 64 * 1024);
8701
8702         r = r600_pcie_gart_init(rdev);
8703         if (r)
8704                 return r;
8705
8706         rdev->accel_working = true;
8707         r = cik_startup(rdev);
8708         if (r) {
8709                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8710                 cik_cp_fini(rdev);
8711                 cik_sdma_fini(rdev);
8712                 cik_irq_fini(rdev);
8713                 sumo_rlc_fini(rdev);
8714                 cik_mec_fini(rdev);
8715                 radeon_wb_fini(rdev);
8716                 radeon_ib_pool_fini(rdev);
8717                 radeon_vm_manager_fini(rdev);
8718                 radeon_irq_kms_fini(rdev);
8719                 cik_pcie_gart_fini(rdev);
8720                 rdev->accel_working = false;
8721         }
8722
8723         /* Don't start up if the MC ucode is missing.
8724          * The default clocks and voltages before the MC ucode
8725          * is loaded are not suffient for advanced operations.
8726          */
8727         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8728                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8729                 return -EINVAL;
8730         }
8731
8732         return 0;
8733 }
8734
8735 /**
8736  * cik_fini - asic specific driver and hw fini
8737  *
8738  * @rdev: radeon_device pointer
8739  *
8740  * Tear down the asic specific driver variables and program the hw
8741  * to an idle state (CIK).
8742  * Called at driver unload.
8743  */
8744 void cik_fini(struct radeon_device *rdev)
8745 {
8746         radeon_pm_fini(rdev);
8747         cik_cp_fini(rdev);
8748         cik_sdma_fini(rdev);
8749         cik_fini_pg(rdev);
8750         cik_fini_cg(rdev);
8751         cik_irq_fini(rdev);
8752         sumo_rlc_fini(rdev);
8753         cik_mec_fini(rdev);
8754         radeon_wb_fini(rdev);
8755         radeon_vm_manager_fini(rdev);
8756         radeon_ib_pool_fini(rdev);
8757         radeon_irq_kms_fini(rdev);
8758         uvd_v1_0_fini(rdev);
8759         radeon_uvd_fini(rdev);
8760         radeon_vce_fini(rdev);
8761         cik_pcie_gart_fini(rdev);
8762         r600_vram_scratch_fini(rdev);
8763         radeon_gem_fini(rdev);
8764         radeon_fence_driver_fini(rdev);
8765         radeon_bo_fini(rdev);
8766         radeon_atombios_fini(rdev);
8767         kfree(rdev->bios);
8768         rdev->bios = NULL;
8769 }
8770
8771 void dce8_program_fmt(struct drm_encoder *encoder)
8772 {
8773         struct drm_device *dev = encoder->dev;
8774         struct radeon_device *rdev = dev->dev_private;
8775         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8776         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8777         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8778         int bpc = 0;
8779         u32 tmp = 0;
8780         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8781
8782         if (connector) {
8783                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8784                 bpc = radeon_get_monitor_bpc(connector);
8785                 dither = radeon_connector->dither;
8786         }
8787
8788         /* LVDS/eDP FMT is set up by atom */
8789         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8790                 return;
8791
8792         /* not needed for analog */
8793         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8794             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8795                 return;
8796
8797         if (bpc == 0)
8798                 return;
8799
8800         switch (bpc) {
8801         case 6:
8802                 if (dither == RADEON_FMT_DITHER_ENABLE)
8803                         /* XXX sort out optimal dither settings */
8804                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8805                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8806                 else
8807                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8808                 break;
8809         case 8:
8810                 if (dither == RADEON_FMT_DITHER_ENABLE)
8811                         /* XXX sort out optimal dither settings */
8812                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8813                                 FMT_RGB_RANDOM_ENABLE |
8814                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8815                 else
8816                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8817                 break;
8818         case 10:
8819                 if (dither == RADEON_FMT_DITHER_ENABLE)
8820                         /* XXX sort out optimal dither settings */
8821                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8822                                 FMT_RGB_RANDOM_ENABLE |
8823                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8824                 else
8825                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8826                 break;
8827         default:
8828                 /* not needed */
8829                 break;
8830         }
8831
8832         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8833 }
8834
8835 /* display watermark setup */
8836 /**
8837  * dce8_line_buffer_adjust - Set up the line buffer
8838  *
8839  * @rdev: radeon_device pointer
8840  * @radeon_crtc: the selected display controller
8841  * @mode: the current display mode on the selected display
8842  * controller
8843  *
8844  * Setup up the line buffer allocation for
8845  * the selected display controller (CIK).
8846  * Returns the line buffer size in pixels.
8847  */
8848 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8849                                    struct radeon_crtc *radeon_crtc,
8850                                    struct drm_display_mode *mode)
8851 {
8852         u32 tmp, buffer_alloc, i;
8853         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8854         /*
8855          * Line Buffer Setup
8856          * There are 6 line buffers, one for each display controllers.
8857          * There are 3 partitions per LB. Select the number of partitions
8858          * to enable based on the display width.  For display widths larger
8859          * than 4096, you need use to use 2 display controllers and combine
8860          * them using the stereo blender.
8861          */
8862         if (radeon_crtc->base.enabled && mode) {
8863                 if (mode->crtc_hdisplay < 1920) {
8864                         tmp = 1;
8865                         buffer_alloc = 2;
8866                 } else if (mode->crtc_hdisplay < 2560) {
8867                         tmp = 2;
8868                         buffer_alloc = 2;
8869                 } else if (mode->crtc_hdisplay < 4096) {
8870                         tmp = 0;
8871                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8872                 } else {
8873                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8874                         tmp = 0;
8875                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8876                 }
8877         } else {
8878                 tmp = 1;
8879                 buffer_alloc = 0;
8880         }
8881
8882         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8883                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8884
8885         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8886                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8887         for (i = 0; i < rdev->usec_timeout; i++) {
8888                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8889                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8890                         break;
8891                 udelay(1);
8892         }
8893
8894         if (radeon_crtc->base.enabled && mode) {
8895                 switch (tmp) {
8896                 case 0:
8897                 default:
8898                         return 4096 * 2;
8899                 case 1:
8900                         return 1920 * 2;
8901                 case 2:
8902                         return 2560 * 2;
8903                 }
8904         }
8905
8906         /* controller not enabled, so no lb used */
8907         return 0;
8908 }
8909
8910 /**
8911  * cik_get_number_of_dram_channels - get the number of dram channels
8912  *
8913  * @rdev: radeon_device pointer
8914  *
8915  * Look up the number of video ram channels (CIK).
8916  * Used for display watermark bandwidth calculations
8917  * Returns the number of dram channels
8918  */
8919 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8920 {
8921         u32 tmp = RREG32(MC_SHARED_CHMAP);
8922
8923         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8924         case 0:
8925         default:
8926                 return 1;
8927         case 1:
8928                 return 2;
8929         case 2:
8930                 return 4;
8931         case 3:
8932                 return 8;
8933         case 4:
8934                 return 3;
8935         case 5:
8936                 return 6;
8937         case 6:
8938                 return 10;
8939         case 7:
8940                 return 12;
8941         case 8:
8942                 return 16;
8943         }
8944 }
8945
8946 struct dce8_wm_params {
8947         u32 dram_channels; /* number of dram channels */
8948         u32 yclk;          /* bandwidth per dram data pin in kHz */
8949         u32 sclk;          /* engine clock in kHz */
8950         u32 disp_clk;      /* display clock in kHz */
8951         u32 src_width;     /* viewport width */
8952         u32 active_time;   /* active display time in ns */
8953         u32 blank_time;    /* blank time in ns */
8954         bool interlaced;    /* mode is interlaced */
8955         fixed20_12 vsc;    /* vertical scale ratio */
8956         u32 num_heads;     /* number of active crtcs */
8957         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8958         u32 lb_size;       /* line buffer allocated to pipe */
8959         u32 vtaps;         /* vertical scaler taps */
8960 };
8961
8962 /**
8963  * dce8_dram_bandwidth - get the dram bandwidth
8964  *
8965  * @wm: watermark calculation data
8966  *
8967  * Calculate the raw dram bandwidth (CIK).
8968  * Used for display watermark bandwidth calculations
8969  * Returns the dram bandwidth in MBytes/s
8970  */
8971 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8972 {
8973         /* Calculate raw DRAM Bandwidth */
8974         fixed20_12 dram_efficiency; /* 0.7 */
8975         fixed20_12 yclk, dram_channels, bandwidth;
8976         fixed20_12 a;
8977
8978         a.full = dfixed_const(1000);
8979         yclk.full = dfixed_const(wm->yclk);
8980         yclk.full = dfixed_div(yclk, a);
8981         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8982         a.full = dfixed_const(10);
8983         dram_efficiency.full = dfixed_const(7);
8984         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8985         bandwidth.full = dfixed_mul(dram_channels, yclk);
8986         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8987
8988         return dfixed_trunc(bandwidth);
8989 }
8990
8991 /**
8992  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8993  *
8994  * @wm: watermark calculation data
8995  *
8996  * Calculate the dram bandwidth used for display (CIK).
8997  * Used for display watermark bandwidth calculations
8998  * Returns the dram bandwidth for display in MBytes/s
8999  */
9000 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9001 {
9002         /* Calculate DRAM Bandwidth and the part allocated to display. */
9003         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9004         fixed20_12 yclk, dram_channels, bandwidth;
9005         fixed20_12 a;
9006
9007         a.full = dfixed_const(1000);
9008         yclk.full = dfixed_const(wm->yclk);
9009         yclk.full = dfixed_div(yclk, a);
9010         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9011         a.full = dfixed_const(10);
9012         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9013         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9014         bandwidth.full = dfixed_mul(dram_channels, yclk);
9015         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9016
9017         return dfixed_trunc(bandwidth);
9018 }
9019
9020 /**
9021  * dce8_data_return_bandwidth - get the data return bandwidth
9022  *
9023  * @wm: watermark calculation data
9024  *
9025  * Calculate the data return bandwidth used for display (CIK).
9026  * Used for display watermark bandwidth calculations
9027  * Returns the data return bandwidth in MBytes/s
9028  */
9029 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9030 {
9031         /* Calculate the display Data return Bandwidth */
9032         fixed20_12 return_efficiency; /* 0.8 */
9033         fixed20_12 sclk, bandwidth;
9034         fixed20_12 a;
9035
9036         a.full = dfixed_const(1000);
9037         sclk.full = dfixed_const(wm->sclk);
9038         sclk.full = dfixed_div(sclk, a);
9039         a.full = dfixed_const(10);
9040         return_efficiency.full = dfixed_const(8);
9041         return_efficiency.full = dfixed_div(return_efficiency, a);
9042         a.full = dfixed_const(32);
9043         bandwidth.full = dfixed_mul(a, sclk);
9044         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9045
9046         return dfixed_trunc(bandwidth);
9047 }
9048
9049 /**
9050  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9051  *
9052  * @wm: watermark calculation data
9053  *
9054  * Calculate the dmif bandwidth used for display (CIK).
9055  * Used for display watermark bandwidth calculations
9056  * Returns the dmif bandwidth in MBytes/s
9057  */
9058 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9059 {
9060         /* Calculate the DMIF Request Bandwidth */
9061         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9062         fixed20_12 disp_clk, bandwidth;
9063         fixed20_12 a, b;
9064
9065         a.full = dfixed_const(1000);
9066         disp_clk.full = dfixed_const(wm->disp_clk);
9067         disp_clk.full = dfixed_div(disp_clk, a);
9068         a.full = dfixed_const(32);
9069         b.full = dfixed_mul(a, disp_clk);
9070
9071         a.full = dfixed_const(10);
9072         disp_clk_request_efficiency.full = dfixed_const(8);
9073         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9074
9075         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9076
9077         return dfixed_trunc(bandwidth);
9078 }
9079
9080 /**
9081  * dce8_available_bandwidth - get the min available bandwidth
9082  *
9083  * @wm: watermark calculation data
9084  *
9085  * Calculate the min available bandwidth used for display (CIK).
9086  * Used for display watermark bandwidth calculations
9087  * Returns the min available bandwidth in MBytes/s
9088  */
9089 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9090 {
9091         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9092         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9093         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9094         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9095
9096         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9097 }
9098
9099 /**
9100  * dce8_average_bandwidth - get the average available bandwidth
9101  *
9102  * @wm: watermark calculation data
9103  *
9104  * Calculate the average available bandwidth used for display (CIK).
9105  * Used for display watermark bandwidth calculations
9106  * Returns the average available bandwidth in MBytes/s
9107  */
9108 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9109 {
9110         /* Calculate the display mode Average Bandwidth
9111          * DisplayMode should contain the source and destination dimensions,
9112          * timing, etc.
9113          */
9114         fixed20_12 bpp;
9115         fixed20_12 line_time;
9116         fixed20_12 src_width;
9117         fixed20_12 bandwidth;
9118         fixed20_12 a;
9119
9120         a.full = dfixed_const(1000);
9121         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9122         line_time.full = dfixed_div(line_time, a);
9123         bpp.full = dfixed_const(wm->bytes_per_pixel);
9124         src_width.full = dfixed_const(wm->src_width);
9125         bandwidth.full = dfixed_mul(src_width, bpp);
9126         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9127         bandwidth.full = dfixed_div(bandwidth, line_time);
9128
9129         return dfixed_trunc(bandwidth);
9130 }
9131
9132 /**
9133  * dce8_latency_watermark - get the latency watermark
9134  *
9135  * @wm: watermark calculation data
9136  *
9137  * Calculate the latency watermark (CIK).
9138  * Used for display watermark bandwidth calculations
9139  * Returns the latency watermark in ns
9140  */
9141 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9142 {
9143         /* First calculate the latency in ns */
9144         u32 mc_latency = 2000; /* 2000 ns. */
9145         u32 available_bandwidth = dce8_available_bandwidth(wm);
9146         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9147         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9148         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9149         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9150                 (wm->num_heads * cursor_line_pair_return_time);
9151         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9152         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9153         u32 tmp, dmif_size = 12288;
9154         fixed20_12 a, b, c;
9155
9156         if (wm->num_heads == 0)
9157                 return 0;
9158
9159         a.full = dfixed_const(2);
9160         b.full = dfixed_const(1);
9161         if ((wm->vsc.full > a.full) ||
9162             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9163             (wm->vtaps >= 5) ||
9164             ((wm->vsc.full >= a.full) && wm->interlaced))
9165                 max_src_lines_per_dst_line = 4;
9166         else
9167                 max_src_lines_per_dst_line = 2;
9168
9169         a.full = dfixed_const(available_bandwidth);
9170         b.full = dfixed_const(wm->num_heads);
9171         a.full = dfixed_div(a, b);
9172
9173         b.full = dfixed_const(mc_latency + 512);
9174         c.full = dfixed_const(wm->disp_clk);
9175         b.full = dfixed_div(b, c);
9176
9177         c.full = dfixed_const(dmif_size);
9178         b.full = dfixed_div(c, b);
9179
9180         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9181
9182         b.full = dfixed_const(1000);
9183         c.full = dfixed_const(wm->disp_clk);
9184         b.full = dfixed_div(c, b);
9185         c.full = dfixed_const(wm->bytes_per_pixel);
9186         b.full = dfixed_mul(b, c);
9187
9188         lb_fill_bw = min(tmp, dfixed_trunc(b));
9189
9190         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9191         b.full = dfixed_const(1000);
9192         c.full = dfixed_const(lb_fill_bw);
9193         b.full = dfixed_div(c, b);
9194         a.full = dfixed_div(a, b);
9195         line_fill_time = dfixed_trunc(a);
9196
9197         if (line_fill_time < wm->active_time)
9198                 return latency;
9199         else
9200                 return latency + (line_fill_time - wm->active_time);
9201
9202 }
9203
9204 /**
9205  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9206  * average and available dram bandwidth
9207  *
9208  * @wm: watermark calculation data
9209  *
9210  * Check if the display average bandwidth fits in the display
9211  * dram bandwidth (CIK).
9212  * Used for display watermark bandwidth calculations
9213  * Returns true if the display fits, false if not.
9214  */
9215 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9216 {
9217         if (dce8_average_bandwidth(wm) <=
9218             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9219                 return true;
9220         else
9221                 return false;
9222 }
9223
9224 /**
9225  * dce8_average_bandwidth_vs_available_bandwidth - check
9226  * average and available bandwidth
9227  *
9228  * @wm: watermark calculation data
9229  *
9230  * Check if the display average bandwidth fits in the display
9231  * available bandwidth (CIK).
9232  * Used for display watermark bandwidth calculations
9233  * Returns true if the display fits, false if not.
9234  */
9235 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9236 {
9237         if (dce8_average_bandwidth(wm) <=
9238             (dce8_available_bandwidth(wm) / wm->num_heads))
9239                 return true;
9240         else
9241                 return false;
9242 }
9243
9244 /**
9245  * dce8_check_latency_hiding - check latency hiding
9246  *
9247  * @wm: watermark calculation data
9248  *
9249  * Check latency hiding (CIK).
9250  * Used for display watermark bandwidth calculations
9251  * Returns true if the display fits, false if not.
9252  */
9253 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9254 {
9255         u32 lb_partitions = wm->lb_size / wm->src_width;
9256         u32 line_time = wm->active_time + wm->blank_time;
9257         u32 latency_tolerant_lines;
9258         u32 latency_hiding;
9259         fixed20_12 a;
9260
9261         a.full = dfixed_const(1);
9262         if (wm->vsc.full > a.full)
9263                 latency_tolerant_lines = 1;
9264         else {
9265                 if (lb_partitions <= (wm->vtaps + 1))
9266                         latency_tolerant_lines = 1;
9267                 else
9268                         latency_tolerant_lines = 2;
9269         }
9270
9271         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9272
9273         if (dce8_latency_watermark(wm) <= latency_hiding)
9274                 return true;
9275         else
9276                 return false;
9277 }
9278
9279 /**
9280  * dce8_program_watermarks - program display watermarks
9281  *
9282  * @rdev: radeon_device pointer
9283  * @radeon_crtc: the selected display controller
9284  * @lb_size: line buffer size
9285  * @num_heads: number of display controllers in use
9286  *
9287  * Calculate and program the display watermarks for the
9288  * selected display controller (CIK).
9289  */
9290 static void dce8_program_watermarks(struct radeon_device *rdev,
9291                                     struct radeon_crtc *radeon_crtc,
9292                                     u32 lb_size, u32 num_heads)
9293 {
9294         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9295         struct dce8_wm_params wm_low, wm_high;
9296         u32 pixel_period;
9297         u32 line_time = 0;
9298         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9299         u32 tmp, wm_mask;
9300
9301         if (radeon_crtc->base.enabled && num_heads && mode) {
9302                 pixel_period = 1000000 / (u32)mode->clock;
9303                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9304
9305                 /* watermark for high clocks */
9306                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9307                     rdev->pm.dpm_enabled) {
9308                         wm_high.yclk =
9309                                 radeon_dpm_get_mclk(rdev, false) * 10;
9310                         wm_high.sclk =
9311                                 radeon_dpm_get_sclk(rdev, false) * 10;
9312                 } else {
9313                         wm_high.yclk = rdev->pm.current_mclk * 10;
9314                         wm_high.sclk = rdev->pm.current_sclk * 10;
9315                 }
9316
9317                 wm_high.disp_clk = mode->clock;
9318                 wm_high.src_width = mode->crtc_hdisplay;
9319                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9320                 wm_high.blank_time = line_time - wm_high.active_time;
9321                 wm_high.interlaced = false;
9322                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9323                         wm_high.interlaced = true;
9324                 wm_high.vsc = radeon_crtc->vsc;
9325                 wm_high.vtaps = 1;
9326                 if (radeon_crtc->rmx_type != RMX_OFF)
9327                         wm_high.vtaps = 2;
9328                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9329                 wm_high.lb_size = lb_size;
9330                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9331                 wm_high.num_heads = num_heads;
9332
9333                 /* set for high clocks */
9334                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9335
9336                 /* possibly force display priority to high */
9337                 /* should really do this at mode validation time... */
9338                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9339                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9340                     !dce8_check_latency_hiding(&wm_high) ||
9341                     (rdev->disp_priority == 2)) {
9342                         DRM_DEBUG_KMS("force priority to high\n");
9343                 }
9344
9345                 /* watermark for low clocks */
9346                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9347                     rdev->pm.dpm_enabled) {
9348                         wm_low.yclk =
9349                                 radeon_dpm_get_mclk(rdev, true) * 10;
9350                         wm_low.sclk =
9351                                 radeon_dpm_get_sclk(rdev, true) * 10;
9352                 } else {
9353                         wm_low.yclk = rdev->pm.current_mclk * 10;
9354                         wm_low.sclk = rdev->pm.current_sclk * 10;
9355                 }
9356
9357                 wm_low.disp_clk = mode->clock;
9358                 wm_low.src_width = mode->crtc_hdisplay;
9359                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9360                 wm_low.blank_time = line_time - wm_low.active_time;
9361                 wm_low.interlaced = false;
9362                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9363                         wm_low.interlaced = true;
9364                 wm_low.vsc = radeon_crtc->vsc;
9365                 wm_low.vtaps = 1;
9366                 if (radeon_crtc->rmx_type != RMX_OFF)
9367                         wm_low.vtaps = 2;
9368                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9369                 wm_low.lb_size = lb_size;
9370                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9371                 wm_low.num_heads = num_heads;
9372
9373                 /* set for low clocks */
9374                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9375
9376                 /* possibly force display priority to high */
9377                 /* should really do this at mode validation time... */
9378                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9379                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9380                     !dce8_check_latency_hiding(&wm_low) ||
9381                     (rdev->disp_priority == 2)) {
9382                         DRM_DEBUG_KMS("force priority to high\n");
9383                 }
9384
9385                 /* Save number of lines the linebuffer leads before the scanout */
9386                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9387         }
9388
9389         /* select wm A */
9390         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9391         tmp = wm_mask;
9392         tmp &= ~LATENCY_WATERMARK_MASK(3);
9393         tmp |= LATENCY_WATERMARK_MASK(1);
9394         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9395         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9396                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9397                 LATENCY_HIGH_WATERMARK(line_time)));
9398         /* select wm B */
9399         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9400         tmp &= ~LATENCY_WATERMARK_MASK(3);
9401         tmp |= LATENCY_WATERMARK_MASK(2);
9402         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9403         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9404                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9405                 LATENCY_HIGH_WATERMARK(line_time)));
9406         /* restore original selection */
9407         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9408
9409         /* save values for DPM */
9410         radeon_crtc->line_time = line_time;
9411         radeon_crtc->wm_high = latency_watermark_a;
9412         radeon_crtc->wm_low = latency_watermark_b;
9413 }
9414
9415 /**
9416  * dce8_bandwidth_update - program display watermarks
9417  *
9418  * @rdev: radeon_device pointer
9419  *
9420  * Calculate and program the display watermarks and line
9421  * buffer allocation (CIK).
9422  */
9423 void dce8_bandwidth_update(struct radeon_device *rdev)
9424 {
9425         struct drm_display_mode *mode = NULL;
9426         u32 num_heads = 0, lb_size;
9427         int i;
9428
9429         if (!rdev->mode_info.mode_config_initialized)
9430                 return;
9431
9432         radeon_update_display_priority(rdev);
9433
9434         for (i = 0; i < rdev->num_crtc; i++) {
9435                 if (rdev->mode_info.crtcs[i]->base.enabled)
9436                         num_heads++;
9437         }
9438         for (i = 0; i < rdev->num_crtc; i++) {
9439                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9440                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9441                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9442         }
9443 }
9444
9445 /**
9446  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9447  *
9448  * @rdev: radeon_device pointer
9449  *
9450  * Fetches a GPU clock counter snapshot (SI).
9451  * Returns the 64 bit clock counter snapshot.
9452  */
9453 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9454 {
9455         uint64_t clock;
9456
9457         mutex_lock(&rdev->gpu_clock_mutex);
9458         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9459         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9460                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9461         mutex_unlock(&rdev->gpu_clock_mutex);
9462         return clock;
9463 }
9464
9465 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9466                              u32 cntl_reg, u32 status_reg)
9467 {
9468         int r, i;
9469         struct atom_clock_dividers dividers;
9470         uint32_t tmp;
9471
9472         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9473                                            clock, false, &dividers);
9474         if (r)
9475                 return r;
9476
9477         tmp = RREG32_SMC(cntl_reg);
9478         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9479         tmp |= dividers.post_divider;
9480         WREG32_SMC(cntl_reg, tmp);
9481
9482         for (i = 0; i < 100; i++) {
9483                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9484                         break;
9485                 mdelay(10);
9486         }
9487         if (i == 100)
9488                 return -ETIMEDOUT;
9489
9490         return 0;
9491 }
9492
9493 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9494 {
9495         int r = 0;
9496
9497         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9498         if (r)
9499                 return r;
9500
9501         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9502         return r;
9503 }
9504
9505 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9506 {
9507         int r, i;
9508         struct atom_clock_dividers dividers;
9509         u32 tmp;
9510
9511         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9512                                            ecclk, false, &dividers);
9513         if (r)
9514                 return r;
9515
9516         for (i = 0; i < 100; i++) {
9517                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9518                         break;
9519                 mdelay(10);
9520         }
9521         if (i == 100)
9522                 return -ETIMEDOUT;
9523
9524         tmp = RREG32_SMC(CG_ECLK_CNTL);
9525         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9526         tmp |= dividers.post_divider;
9527         WREG32_SMC(CG_ECLK_CNTL, tmp);
9528
9529         for (i = 0; i < 100; i++) {
9530                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9531                         break;
9532                 mdelay(10);
9533         }
9534         if (i == 100)
9535                 return -ETIMEDOUT;
9536
9537         return 0;
9538 }
9539
9540 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9541 {
9542         struct pci_dev *root = rdev->pdev->bus->self;
9543         int bridge_pos, gpu_pos;
9544         u32 speed_cntl, mask, current_data_rate;
9545         int ret, i;
9546         u16 tmp16;
9547
9548         if (pci_is_root_bus(rdev->pdev->bus))
9549                 return;
9550
9551         if (radeon_pcie_gen2 == 0)
9552                 return;
9553
9554         if (rdev->flags & RADEON_IS_IGP)
9555                 return;
9556
9557         if (!(rdev->flags & RADEON_IS_PCIE))
9558                 return;
9559
9560         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9561         if (ret != 0)
9562                 return;
9563
9564         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9565                 return;
9566
9567         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9568         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9569                 LC_CURRENT_DATA_RATE_SHIFT;
9570         if (mask & DRM_PCIE_SPEED_80) {
9571                 if (current_data_rate == 2) {
9572                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9573                         return;
9574                 }
9575                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9576         } else if (mask & DRM_PCIE_SPEED_50) {
9577                 if (current_data_rate == 1) {
9578                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9579                         return;
9580                 }
9581                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9582         }
9583
9584         bridge_pos = pci_pcie_cap(root);
9585         if (!bridge_pos)
9586                 return;
9587
9588         gpu_pos = pci_pcie_cap(rdev->pdev);
9589         if (!gpu_pos)
9590                 return;
9591
9592         if (mask & DRM_PCIE_SPEED_80) {
9593                 /* re-try equalization if gen3 is not already enabled */
9594                 if (current_data_rate != 2) {
9595                         u16 bridge_cfg, gpu_cfg;
9596                         u16 bridge_cfg2, gpu_cfg2;
9597                         u32 max_lw, current_lw, tmp;
9598
9599                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9600                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9601
9602                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9603                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9604
9605                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9606                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9607
9608                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9609                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9610                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9611
9612                         if (current_lw < max_lw) {
9613                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9614                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9615                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9616                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9617                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9618                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9619                                 }
9620                         }
9621
9622                         for (i = 0; i < 10; i++) {
9623                                 /* check status */
9624                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9625                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9626                                         break;
9627
9628                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9629                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9630
9631                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9632                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9633
9634                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9635                                 tmp |= LC_SET_QUIESCE;
9636                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9637
9638                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9639                                 tmp |= LC_REDO_EQ;
9640                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9641
9642                                 mdelay(100);
9643
9644                                 /* linkctl */
9645                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9646                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9647                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9648                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9649
9650                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9651                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9652                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9653                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9654
9655                                 /* linkctl2 */
9656                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9657                                 tmp16 &= ~((1 << 4) | (7 << 9));
9658                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9659                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9660
9661                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9662                                 tmp16 &= ~((1 << 4) | (7 << 9));
9663                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9664                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9665
9666                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9667                                 tmp &= ~LC_SET_QUIESCE;
9668                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9669                         }
9670                 }
9671         }
9672
9673         /* set the link speed */
9674         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9675         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9676         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9677
9678         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9679         tmp16 &= ~0xf;
9680         if (mask & DRM_PCIE_SPEED_80)
9681                 tmp16 |= 3; /* gen3 */
9682         else if (mask & DRM_PCIE_SPEED_50)
9683                 tmp16 |= 2; /* gen2 */
9684         else
9685                 tmp16 |= 1; /* gen1 */
9686         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9687
9688         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9689         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9690         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9691
9692         for (i = 0; i < rdev->usec_timeout; i++) {
9693                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9694                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9695                         break;
9696                 udelay(1);
9697         }
9698 }
9699
9700 static void cik_program_aspm(struct radeon_device *rdev)
9701 {
9702         u32 data, orig;
9703         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9704         bool disable_clkreq = false;
9705
9706         if (radeon_aspm == 0)
9707                 return;
9708
9709         /* XXX double check IGPs */
9710         if (rdev->flags & RADEON_IS_IGP)
9711                 return;
9712
9713         if (!(rdev->flags & RADEON_IS_PCIE))
9714                 return;
9715
9716         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9717         data &= ~LC_XMIT_N_FTS_MASK;
9718         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9719         if (orig != data)
9720                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9721
9722         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9723         data |= LC_GO_TO_RECOVERY;
9724         if (orig != data)
9725                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9726
9727         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9728         data |= P_IGNORE_EDB_ERR;
9729         if (orig != data)
9730                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9731
9732         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9733         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9734         data |= LC_PMI_TO_L1_DIS;
9735         if (!disable_l0s)
9736                 data |= LC_L0S_INACTIVITY(7);
9737
9738         if (!disable_l1) {
9739                 data |= LC_L1_INACTIVITY(7);
9740                 data &= ~LC_PMI_TO_L1_DIS;
9741                 if (orig != data)
9742                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9743
9744                 if (!disable_plloff_in_l1) {
9745                         bool clk_req_support;
9746
9747                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9748                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9749                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9750                         if (orig != data)
9751                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9752
9753                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9754                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9755                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9756                         if (orig != data)
9757                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9758
9759                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9760                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9761                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9762                         if (orig != data)
9763                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9764
9765                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9766                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9767                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9768                         if (orig != data)
9769                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9770
9771                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9772                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9773                         data |= LC_DYN_LANES_PWR_STATE(3);
9774                         if (orig != data)
9775                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9776
9777                         if (!disable_clkreq &&
9778                             !pci_is_root_bus(rdev->pdev->bus)) {
9779                                 struct pci_dev *root = rdev->pdev->bus->self;
9780                                 u32 lnkcap;
9781
9782                                 clk_req_support = false;
9783                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9784                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9785                                         clk_req_support = true;
9786                         } else {
9787                                 clk_req_support = false;
9788                         }
9789
9790                         if (clk_req_support) {
9791                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9792                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9793                                 if (orig != data)
9794                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9795
9796                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9797                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9798                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9799                                 if (orig != data)
9800                                         WREG32_SMC(THM_CLK_CNTL, data);
9801
9802                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9803                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9804                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9805                                 if (orig != data)
9806                                         WREG32_SMC(MISC_CLK_CTRL, data);
9807
9808                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9809                                 data &= ~BCLK_AS_XCLK;
9810                                 if (orig != data)
9811                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9812
9813                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9814                                 data &= ~FORCE_BIF_REFCLK_EN;
9815                                 if (orig != data)
9816                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9817
9818                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9819                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9820                                 data |= MPLL_CLKOUT_SEL(4);
9821                                 if (orig != data)
9822                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9823                         }
9824                 }
9825         } else {
9826                 if (orig != data)
9827                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9828         }
9829
9830         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9831         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9832         if (orig != data)
9833                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9834
9835         if (!disable_l0s) {
9836                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9837                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9838                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9839                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9840                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9841                                 data &= ~LC_L0S_INACTIVITY_MASK;
9842                                 if (orig != data)
9843                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9844                         }
9845                 }
9846         }
9847 }