drm/radeon: add audio support for DCE6/8 GPUs (v12)
[linux-2.6-block.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71                                  struct radeon_ib *ib,
72                                  uint64_t pe,
73                                  uint64_t addr, unsigned count,
74                                  uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80
81 /* get temperature in millidegrees */
82 int ci_get_temp(struct radeon_device *rdev)
83 {
84         u32 temp;
85         int actual_temp = 0;
86
87         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
88                 CTF_TEMP_SHIFT;
89
90         if (temp & 0x200)
91                 actual_temp = 255;
92         else
93                 actual_temp = temp & 0x1ff;
94
95         actual_temp = actual_temp * 1000;
96
97         return actual_temp;
98 }
99
100 /* get temperature in millidegrees */
101 int kv_get_temp(struct radeon_device *rdev)
102 {
103         u32 temp;
104         int actual_temp = 0;
105
106         temp = RREG32_SMC(0xC0300E0C);
107
108         if (temp)
109                 actual_temp = (temp / 8) - 49;
110         else
111                 actual_temp = 0;
112
113         actual_temp = actual_temp * 1000;
114
115         return actual_temp;
116 }
117
118 /*
119  * Indirect registers accessor
120  */
121 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
122 {
123         u32 r;
124
125         WREG32(PCIE_INDEX, reg);
126         (void)RREG32(PCIE_INDEX);
127         r = RREG32(PCIE_DATA);
128         return r;
129 }
130
131 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
132 {
133         WREG32(PCIE_INDEX, reg);
134         (void)RREG32(PCIE_INDEX);
135         WREG32(PCIE_DATA, v);
136         (void)RREG32(PCIE_DATA);
137 }
138
139 static const u32 spectre_rlc_save_restore_register_list[] =
140 {
141         (0x0e00 << 16) | (0xc12c >> 2),
142         0x00000000,
143         (0x0e00 << 16) | (0xc140 >> 2),
144         0x00000000,
145         (0x0e00 << 16) | (0xc150 >> 2),
146         0x00000000,
147         (0x0e00 << 16) | (0xc15c >> 2),
148         0x00000000,
149         (0x0e00 << 16) | (0xc168 >> 2),
150         0x00000000,
151         (0x0e00 << 16) | (0xc170 >> 2),
152         0x00000000,
153         (0x0e00 << 16) | (0xc178 >> 2),
154         0x00000000,
155         (0x0e00 << 16) | (0xc204 >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc2b4 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc2b8 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc2bc >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc2c0 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0x8228 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0x829c >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0x869c >> 2),
170         0x00000000,
171         (0x0600 << 16) | (0x98f4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0x98f8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0x9900 >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc260 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x90e8 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x3c000 >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x3c00c >> 2),
184         0x00000000,
185         (0x0e00 << 16) | (0x8c1c >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x9700 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0xcd20 >> 2),
190         0x00000000,
191         (0x4e00 << 16) | (0xcd20 >> 2),
192         0x00000000,
193         (0x5e00 << 16) | (0xcd20 >> 2),
194         0x00000000,
195         (0x6e00 << 16) | (0xcd20 >> 2),
196         0x00000000,
197         (0x7e00 << 16) | (0xcd20 >> 2),
198         0x00000000,
199         (0x8e00 << 16) | (0xcd20 >> 2),
200         0x00000000,
201         (0x9e00 << 16) | (0xcd20 >> 2),
202         0x00000000,
203         (0xae00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0xbe00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x0e00 << 16) | (0x89bc >> 2),
208         0x00000000,
209         (0x0e00 << 16) | (0x8900 >> 2),
210         0x00000000,
211         0x3,
212         (0x0e00 << 16) | (0xc130 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0xc134 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0xc1fc >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0xc208 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc264 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc268 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc26c >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc270 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc274 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc278 >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc27c >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc280 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc284 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc288 >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc28c >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc290 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc294 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc298 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc29c >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc2a0 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc2a4 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc2a8 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc2ac  >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc2b0 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0x301d0 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0x30238 >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0x30250 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0x30254 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0x30258 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0x3025c >> 2),
271         0x00000000,
272         (0x4e00 << 16) | (0xc900 >> 2),
273         0x00000000,
274         (0x5e00 << 16) | (0xc900 >> 2),
275         0x00000000,
276         (0x6e00 << 16) | (0xc900 >> 2),
277         0x00000000,
278         (0x7e00 << 16) | (0xc900 >> 2),
279         0x00000000,
280         (0x8e00 << 16) | (0xc900 >> 2),
281         0x00000000,
282         (0x9e00 << 16) | (0xc900 >> 2),
283         0x00000000,
284         (0xae00 << 16) | (0xc900 >> 2),
285         0x00000000,
286         (0xbe00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x4e00 << 16) | (0xc904 >> 2),
289         0x00000000,
290         (0x5e00 << 16) | (0xc904 >> 2),
291         0x00000000,
292         (0x6e00 << 16) | (0xc904 >> 2),
293         0x00000000,
294         (0x7e00 << 16) | (0xc904 >> 2),
295         0x00000000,
296         (0x8e00 << 16) | (0xc904 >> 2),
297         0x00000000,
298         (0x9e00 << 16) | (0xc904 >> 2),
299         0x00000000,
300         (0xae00 << 16) | (0xc904 >> 2),
301         0x00000000,
302         (0xbe00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x4e00 << 16) | (0xc908 >> 2),
305         0x00000000,
306         (0x5e00 << 16) | (0xc908 >> 2),
307         0x00000000,
308         (0x6e00 << 16) | (0xc908 >> 2),
309         0x00000000,
310         (0x7e00 << 16) | (0xc908 >> 2),
311         0x00000000,
312         (0x8e00 << 16) | (0xc908 >> 2),
313         0x00000000,
314         (0x9e00 << 16) | (0xc908 >> 2),
315         0x00000000,
316         (0xae00 << 16) | (0xc908 >> 2),
317         0x00000000,
318         (0xbe00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x4e00 << 16) | (0xc90c >> 2),
321         0x00000000,
322         (0x5e00 << 16) | (0xc90c >> 2),
323         0x00000000,
324         (0x6e00 << 16) | (0xc90c >> 2),
325         0x00000000,
326         (0x7e00 << 16) | (0xc90c >> 2),
327         0x00000000,
328         (0x8e00 << 16) | (0xc90c >> 2),
329         0x00000000,
330         (0x9e00 << 16) | (0xc90c >> 2),
331         0x00000000,
332         (0xae00 << 16) | (0xc90c >> 2),
333         0x00000000,
334         (0xbe00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x4e00 << 16) | (0xc910 >> 2),
337         0x00000000,
338         (0x5e00 << 16) | (0xc910 >> 2),
339         0x00000000,
340         (0x6e00 << 16) | (0xc910 >> 2),
341         0x00000000,
342         (0x7e00 << 16) | (0xc910 >> 2),
343         0x00000000,
344         (0x8e00 << 16) | (0xc910 >> 2),
345         0x00000000,
346         (0x9e00 << 16) | (0xc910 >> 2),
347         0x00000000,
348         (0xae00 << 16) | (0xc910 >> 2),
349         0x00000000,
350         (0xbe00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc99c >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0x9834 >> 2),
355         0x00000000,
356         (0x0000 << 16) | (0x30f00 >> 2),
357         0x00000000,
358         (0x0001 << 16) | (0x30f00 >> 2),
359         0x00000000,
360         (0x0000 << 16) | (0x30f04 >> 2),
361         0x00000000,
362         (0x0001 << 16) | (0x30f04 >> 2),
363         0x00000000,
364         (0x0000 << 16) | (0x30f08 >> 2),
365         0x00000000,
366         (0x0001 << 16) | (0x30f08 >> 2),
367         0x00000000,
368         (0x0000 << 16) | (0x30f0c >> 2),
369         0x00000000,
370         (0x0001 << 16) | (0x30f0c >> 2),
371         0x00000000,
372         (0x0600 << 16) | (0x9b7c >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0x8a14 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0x8a18 >> 2),
377         0x00000000,
378         (0x0600 << 16) | (0x30a00 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0x8bf0 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0x8bcc >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0x8b24 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0x30a04 >> 2),
387         0x00000000,
388         (0x0600 << 16) | (0x30a10 >> 2),
389         0x00000000,
390         (0x0600 << 16) | (0x30a14 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a18 >> 2),
393         0x00000000,
394         (0x0600 << 16) | (0x30a2c >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0xc700 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0xc704 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0xc708 >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0xc768 >> 2),
403         0x00000000,
404         (0x0400 << 16) | (0xc770 >> 2),
405         0x00000000,
406         (0x0400 << 16) | (0xc774 >> 2),
407         0x00000000,
408         (0x0400 << 16) | (0xc778 >> 2),
409         0x00000000,
410         (0x0400 << 16) | (0xc77c >> 2),
411         0x00000000,
412         (0x0400 << 16) | (0xc780 >> 2),
413         0x00000000,
414         (0x0400 << 16) | (0xc784 >> 2),
415         0x00000000,
416         (0x0400 << 16) | (0xc788 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc78c >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc798 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc79c >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc7a0 >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc7a4 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc7a8 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc7ac >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc7b0 >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc7b4 >> 2),
435         0x00000000,
436         (0x0e00 << 16) | (0x9100 >> 2),
437         0x00000000,
438         (0x0e00 << 16) | (0x3c010 >> 2),
439         0x00000000,
440         (0x0e00 << 16) | (0x92a8 >> 2),
441         0x00000000,
442         (0x0e00 << 16) | (0x92ac >> 2),
443         0x00000000,
444         (0x0e00 << 16) | (0x92b4 >> 2),
445         0x00000000,
446         (0x0e00 << 16) | (0x92b8 >> 2),
447         0x00000000,
448         (0x0e00 << 16) | (0x92bc >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x92c0 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x92c4 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92c8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92cc >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92d0 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x8c00 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x8c04 >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x8c20 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x8c38 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x8c3c >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0xae00 >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x9604 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0xac08 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0xac0c >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0xac10 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xac14 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0xac58 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xac68 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0xac6c >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac70 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac74 >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac78 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac7c >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac80 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac84 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac88 >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac8c >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x970c >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x9714 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x9718 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x971c >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x31068 >> 2),
513         0x00000000,
514         (0x4e00 << 16) | (0x31068 >> 2),
515         0x00000000,
516         (0x5e00 << 16) | (0x31068 >> 2),
517         0x00000000,
518         (0x6e00 << 16) | (0x31068 >> 2),
519         0x00000000,
520         (0x7e00 << 16) | (0x31068 >> 2),
521         0x00000000,
522         (0x8e00 << 16) | (0x31068 >> 2),
523         0x00000000,
524         (0x9e00 << 16) | (0x31068 >> 2),
525         0x00000000,
526         (0xae00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0xbe00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xcd10 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xcd14 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0x88b0 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0x88b4 >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0x88b8 >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0x88bc >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0x89c0 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0x88c4 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0x88c8 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88d0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88d4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88d8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x8980 >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0x30938 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x3093c >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x30940 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x89a0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x30900 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x30904 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x89b4 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x3c210 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3c214 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x3c218 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x8904 >> 2),
577         0x00000000,
578         0x5,
579         (0x0e00 << 16) | (0x8c28 >> 2),
580         (0x0e00 << 16) | (0x8c2c >> 2),
581         (0x0e00 << 16) | (0x8c30 >> 2),
582         (0x0e00 << 16) | (0x8c34 >> 2),
583         (0x0e00 << 16) | (0x9600 >> 2),
584 };
585
586 static const u32 kalindi_rlc_save_restore_register_list[] =
587 {
588         (0x0e00 << 16) | (0xc12c >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0xc140 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0xc150 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0xc15c >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0xc168 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xc170 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xc204 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xc2b4 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc2b8 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc2bc >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc2c0 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0x8228 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0x829c >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0x869c >> 2),
615         0x00000000,
616         (0x0600 << 16) | (0x98f4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x98f8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0x9900 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc260 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x90e8 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x3c000 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x3c00c >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x8c1c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x9700 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0xcd20 >> 2),
635         0x00000000,
636         (0x4e00 << 16) | (0xcd20 >> 2),
637         0x00000000,
638         (0x5e00 << 16) | (0xcd20 >> 2),
639         0x00000000,
640         (0x6e00 << 16) | (0xcd20 >> 2),
641         0x00000000,
642         (0x7e00 << 16) | (0xcd20 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x89bc >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x8900 >> 2),
647         0x00000000,
648         0x3,
649         (0x0e00 << 16) | (0xc130 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0xc134 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0xc1fc >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0xc208 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0xc264 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc268 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc26c >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc270 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc274 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc28c >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc290 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc294 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc298 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc2a0 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc2a4 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc2a8 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc2ac >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x301d0 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0x30238 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x30250 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x30254 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0x30258 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x3025c >> 2),
694         0x00000000,
695         (0x4e00 << 16) | (0xc900 >> 2),
696         0x00000000,
697         (0x5e00 << 16) | (0xc900 >> 2),
698         0x00000000,
699         (0x6e00 << 16) | (0xc900 >> 2),
700         0x00000000,
701         (0x7e00 << 16) | (0xc900 >> 2),
702         0x00000000,
703         (0x4e00 << 16) | (0xc904 >> 2),
704         0x00000000,
705         (0x5e00 << 16) | (0xc904 >> 2),
706         0x00000000,
707         (0x6e00 << 16) | (0xc904 >> 2),
708         0x00000000,
709         (0x7e00 << 16) | (0xc904 >> 2),
710         0x00000000,
711         (0x4e00 << 16) | (0xc908 >> 2),
712         0x00000000,
713         (0x5e00 << 16) | (0xc908 >> 2),
714         0x00000000,
715         (0x6e00 << 16) | (0xc908 >> 2),
716         0x00000000,
717         (0x7e00 << 16) | (0xc908 >> 2),
718         0x00000000,
719         (0x4e00 << 16) | (0xc90c >> 2),
720         0x00000000,
721         (0x5e00 << 16) | (0xc90c >> 2),
722         0x00000000,
723         (0x6e00 << 16) | (0xc90c >> 2),
724         0x00000000,
725         (0x7e00 << 16) | (0xc90c >> 2),
726         0x00000000,
727         (0x4e00 << 16) | (0xc910 >> 2),
728         0x00000000,
729         (0x5e00 << 16) | (0xc910 >> 2),
730         0x00000000,
731         (0x6e00 << 16) | (0xc910 >> 2),
732         0x00000000,
733         (0x7e00 << 16) | (0xc910 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0xc99c >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0x9834 >> 2),
738         0x00000000,
739         (0x0000 << 16) | (0x30f00 >> 2),
740         0x00000000,
741         (0x0000 << 16) | (0x30f04 >> 2),
742         0x00000000,
743         (0x0000 << 16) | (0x30f08 >> 2),
744         0x00000000,
745         (0x0000 << 16) | (0x30f0c >> 2),
746         0x00000000,
747         (0x0600 << 16) | (0x9b7c >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0x8a14 >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x8a18 >> 2),
752         0x00000000,
753         (0x0600 << 16) | (0x30a00 >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x8bf0 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x8bcc >> 2),
758         0x00000000,
759         (0x0e00 << 16) | (0x8b24 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x30a04 >> 2),
762         0x00000000,
763         (0x0600 << 16) | (0x30a10 >> 2),
764         0x00000000,
765         (0x0600 << 16) | (0x30a14 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a18 >> 2),
768         0x00000000,
769         (0x0600 << 16) | (0x30a2c >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0xc700 >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0xc704 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0xc708 >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0xc768 >> 2),
778         0x00000000,
779         (0x0400 << 16) | (0xc770 >> 2),
780         0x00000000,
781         (0x0400 << 16) | (0xc774 >> 2),
782         0x00000000,
783         (0x0400 << 16) | (0xc798 >> 2),
784         0x00000000,
785         (0x0400 << 16) | (0xc79c >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0x9100 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0x3c010 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0x8c00 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0x8c04 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0x8c20 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0x8c38 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0x8c3c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xae00 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x9604 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xac08 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xac0c >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xac10 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xac14 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xac58 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xac68 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0xac6c >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac70 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac74 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac78 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac7c >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac80 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac84 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac88 >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac8c >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0x970c >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0x9714 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0x9718 >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0x971c >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0x31068 >> 2),
844         0x00000000,
845         (0x4e00 << 16) | (0x31068 >> 2),
846         0x00000000,
847         (0x5e00 << 16) | (0x31068 >> 2),
848         0x00000000,
849         (0x6e00 << 16) | (0x31068 >> 2),
850         0x00000000,
851         (0x7e00 << 16) | (0x31068 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0xcd10 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0xcd14 >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x88b0 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0x88b4 >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0x88b8 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0x88bc >> 2),
864         0x00000000,
865         (0x0400 << 16) | (0x89c0 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x88c4 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x88c8 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88d0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88d4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88d8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8980 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x30938 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x3093c >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x30940 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x89a0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x30900 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30904 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x89b4 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x3e1fc >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3c210 >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x3c214 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x3c218 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x8904 >> 2),
902         0x00000000,
903         0x5,
904         (0x0e00 << 16) | (0x8c28 >> 2),
905         (0x0e00 << 16) | (0x8c2c >> 2),
906         (0x0e00 << 16) | (0x8c30 >> 2),
907         (0x0e00 << 16) | (0x8c34 >> 2),
908         (0x0e00 << 16) | (0x9600 >> 2),
909 };
910
911 static const u32 bonaire_golden_spm_registers[] =
912 {
913         0x30800, 0xe0ffffff, 0xe0000000
914 };
915
916 static const u32 bonaire_golden_common_registers[] =
917 {
918         0xc770, 0xffffffff, 0x00000800,
919         0xc774, 0xffffffff, 0x00000800,
920         0xc798, 0xffffffff, 0x00007fbf,
921         0xc79c, 0xffffffff, 0x00007faf
922 };
923
924 static const u32 bonaire_golden_registers[] =
925 {
926         0x3354, 0x00000333, 0x00000333,
927         0x3350, 0x000c0fc0, 0x00040200,
928         0x9a10, 0x00010000, 0x00058208,
929         0x3c000, 0xffff1fff, 0x00140000,
930         0x3c200, 0xfdfc0fff, 0x00000100,
931         0x3c234, 0x40000000, 0x40000200,
932         0x9830, 0xffffffff, 0x00000000,
933         0x9834, 0xf00fffff, 0x00000400,
934         0x9838, 0x0002021c, 0x00020200,
935         0xc78, 0x00000080, 0x00000000,
936         0x5bb0, 0x000000f0, 0x00000070,
937         0x5bc0, 0xf0311fff, 0x80300000,
938         0x98f8, 0x73773777, 0x12010001,
939         0x350c, 0x00810000, 0x408af000,
940         0x7030, 0x31000111, 0x00000011,
941         0x2f48, 0x73773777, 0x12010001,
942         0x220c, 0x00007fb6, 0x0021a1b1,
943         0x2210, 0x00007fb6, 0x002021b1,
944         0x2180, 0x00007fb6, 0x00002191,
945         0x2218, 0x00007fb6, 0x002121b1,
946         0x221c, 0x00007fb6, 0x002021b1,
947         0x21dc, 0x00007fb6, 0x00002191,
948         0x21e0, 0x00007fb6, 0x00002191,
949         0x3628, 0x0000003f, 0x0000000a,
950         0x362c, 0x0000003f, 0x0000000a,
951         0x2ae4, 0x00073ffe, 0x000022a2,
952         0x240c, 0x000007ff, 0x00000000,
953         0x8a14, 0xf000003f, 0x00000007,
954         0x8bf0, 0x00002001, 0x00000001,
955         0x8b24, 0xffffffff, 0x00ffffff,
956         0x30a04, 0x0000ff0f, 0x00000000,
957         0x28a4c, 0x07ffffff, 0x06000000,
958         0x4d8, 0x00000fff, 0x00000100,
959         0x3e78, 0x00000001, 0x00000002,
960         0x9100, 0x03000000, 0x0362c688,
961         0x8c00, 0x000000ff, 0x00000001,
962         0xe40, 0x00001fff, 0x00001fff,
963         0x9060, 0x0000007f, 0x00000020,
964         0x9508, 0x00010000, 0x00010000,
965         0xac14, 0x000003ff, 0x000000f3,
966         0xac0c, 0xffffffff, 0x00001032
967 };
968
969 static const u32 bonaire_mgcg_cgcg_init[] =
970 {
971         0xc420, 0xffffffff, 0xfffffffc,
972         0x30800, 0xffffffff, 0xe0000000,
973         0x3c2a0, 0xffffffff, 0x00000100,
974         0x3c208, 0xffffffff, 0x00000100,
975         0x3c2c0, 0xffffffff, 0xc0000100,
976         0x3c2c8, 0xffffffff, 0xc0000100,
977         0x3c2c4, 0xffffffff, 0xc0000100,
978         0x55e4, 0xffffffff, 0x00600100,
979         0x3c280, 0xffffffff, 0x00000100,
980         0x3c214, 0xffffffff, 0x06000100,
981         0x3c220, 0xffffffff, 0x00000100,
982         0x3c218, 0xffffffff, 0x06000100,
983         0x3c204, 0xffffffff, 0x00000100,
984         0x3c2e0, 0xffffffff, 0x00000100,
985         0x3c224, 0xffffffff, 0x00000100,
986         0x3c200, 0xffffffff, 0x00000100,
987         0x3c230, 0xffffffff, 0x00000100,
988         0x3c234, 0xffffffff, 0x00000100,
989         0x3c250, 0xffffffff, 0x00000100,
990         0x3c254, 0xffffffff, 0x00000100,
991         0x3c258, 0xffffffff, 0x00000100,
992         0x3c25c, 0xffffffff, 0x00000100,
993         0x3c260, 0xffffffff, 0x00000100,
994         0x3c27c, 0xffffffff, 0x00000100,
995         0x3c278, 0xffffffff, 0x00000100,
996         0x3c210, 0xffffffff, 0x06000100,
997         0x3c290, 0xffffffff, 0x00000100,
998         0x3c274, 0xffffffff, 0x00000100,
999         0x3c2b4, 0xffffffff, 0x00000100,
1000         0x3c2b0, 0xffffffff, 0x00000100,
1001         0x3c270, 0xffffffff, 0x00000100,
1002         0x30800, 0xffffffff, 0xe0000000,
1003         0x3c020, 0xffffffff, 0x00010000,
1004         0x3c024, 0xffffffff, 0x00030002,
1005         0x3c028, 0xffffffff, 0x00040007,
1006         0x3c02c, 0xffffffff, 0x00060005,
1007         0x3c030, 0xffffffff, 0x00090008,
1008         0x3c034, 0xffffffff, 0x00010000,
1009         0x3c038, 0xffffffff, 0x00030002,
1010         0x3c03c, 0xffffffff, 0x00040007,
1011         0x3c040, 0xffffffff, 0x00060005,
1012         0x3c044, 0xffffffff, 0x00090008,
1013         0x3c048, 0xffffffff, 0x00010000,
1014         0x3c04c, 0xffffffff, 0x00030002,
1015         0x3c050, 0xffffffff, 0x00040007,
1016         0x3c054, 0xffffffff, 0x00060005,
1017         0x3c058, 0xffffffff, 0x00090008,
1018         0x3c05c, 0xffffffff, 0x00010000,
1019         0x3c060, 0xffffffff, 0x00030002,
1020         0x3c064, 0xffffffff, 0x00040007,
1021         0x3c068, 0xffffffff, 0x00060005,
1022         0x3c06c, 0xffffffff, 0x00090008,
1023         0x3c070, 0xffffffff, 0x00010000,
1024         0x3c074, 0xffffffff, 0x00030002,
1025         0x3c078, 0xffffffff, 0x00040007,
1026         0x3c07c, 0xffffffff, 0x00060005,
1027         0x3c080, 0xffffffff, 0x00090008,
1028         0x3c084, 0xffffffff, 0x00010000,
1029         0x3c088, 0xffffffff, 0x00030002,
1030         0x3c08c, 0xffffffff, 0x00040007,
1031         0x3c090, 0xffffffff, 0x00060005,
1032         0x3c094, 0xffffffff, 0x00090008,
1033         0x3c098, 0xffffffff, 0x00010000,
1034         0x3c09c, 0xffffffff, 0x00030002,
1035         0x3c0a0, 0xffffffff, 0x00040007,
1036         0x3c0a4, 0xffffffff, 0x00060005,
1037         0x3c0a8, 0xffffffff, 0x00090008,
1038         0x3c000, 0xffffffff, 0x96e00200,
1039         0x8708, 0xffffffff, 0x00900100,
1040         0xc424, 0xffffffff, 0x0020003f,
1041         0x38, 0xffffffff, 0x0140001c,
1042         0x3c, 0x000f0000, 0x000f0000,
1043         0x220, 0xffffffff, 0xC060000C,
1044         0x224, 0xc0000fff, 0x00000100,
1045         0xf90, 0xffffffff, 0x00000100,
1046         0xf98, 0x00000101, 0x00000000,
1047         0x20a8, 0xffffffff, 0x00000104,
1048         0x55e4, 0xff000fff, 0x00000100,
1049         0x30cc, 0xc0000fff, 0x00000104,
1050         0xc1e4, 0x00000001, 0x00000001,
1051         0xd00c, 0xff000ff0, 0x00000100,
1052         0xd80c, 0xff000ff0, 0x00000100
1053 };
1054
1055 static const u32 spectre_golden_spm_registers[] =
1056 {
1057         0x30800, 0xe0ffffff, 0xe0000000
1058 };
1059
1060 static const u32 spectre_golden_common_registers[] =
1061 {
1062         0xc770, 0xffffffff, 0x00000800,
1063         0xc774, 0xffffffff, 0x00000800,
1064         0xc798, 0xffffffff, 0x00007fbf,
1065         0xc79c, 0xffffffff, 0x00007faf
1066 };
1067
1068 static const u32 spectre_golden_registers[] =
1069 {
1070         0x3c000, 0xffff1fff, 0x96940200,
1071         0x3c00c, 0xffff0001, 0xff000000,
1072         0x3c200, 0xfffc0fff, 0x00000100,
1073         0x6ed8, 0x00010101, 0x00010000,
1074         0x9834, 0xf00fffff, 0x00000400,
1075         0x9838, 0xfffffffc, 0x00020200,
1076         0x5bb0, 0x000000f0, 0x00000070,
1077         0x5bc0, 0xf0311fff, 0x80300000,
1078         0x98f8, 0x73773777, 0x12010001,
1079         0x9b7c, 0x00ff0000, 0x00fc0000,
1080         0x2f48, 0x73773777, 0x12010001,
1081         0x8a14, 0xf000003f, 0x00000007,
1082         0x8b24, 0xffffffff, 0x00ffffff,
1083         0x28350, 0x3f3f3fff, 0x00000082,
1084         0x28355, 0x0000003f, 0x00000000,
1085         0x3e78, 0x00000001, 0x00000002,
1086         0x913c, 0xffff03df, 0x00000004,
1087         0xc768, 0x00000008, 0x00000008,
1088         0x8c00, 0x000008ff, 0x00000800,
1089         0x9508, 0x00010000, 0x00010000,
1090         0xac0c, 0xffffffff, 0x54763210,
1091         0x214f8, 0x01ff01ff, 0x00000002,
1092         0x21498, 0x007ff800, 0x00200000,
1093         0x2015c, 0xffffffff, 0x00000f40,
1094         0x30934, 0xffffffff, 0x00000001
1095 };
1096
1097 static const u32 spectre_mgcg_cgcg_init[] =
1098 {
1099         0xc420, 0xffffffff, 0xfffffffc,
1100         0x30800, 0xffffffff, 0xe0000000,
1101         0x3c2a0, 0xffffffff, 0x00000100,
1102         0x3c208, 0xffffffff, 0x00000100,
1103         0x3c2c0, 0xffffffff, 0x00000100,
1104         0x3c2c8, 0xffffffff, 0x00000100,
1105         0x3c2c4, 0xffffffff, 0x00000100,
1106         0x55e4, 0xffffffff, 0x00600100,
1107         0x3c280, 0xffffffff, 0x00000100,
1108         0x3c214, 0xffffffff, 0x06000100,
1109         0x3c220, 0xffffffff, 0x00000100,
1110         0x3c218, 0xffffffff, 0x06000100,
1111         0x3c204, 0xffffffff, 0x00000100,
1112         0x3c2e0, 0xffffffff, 0x00000100,
1113         0x3c224, 0xffffffff, 0x00000100,
1114         0x3c200, 0xffffffff, 0x00000100,
1115         0x3c230, 0xffffffff, 0x00000100,
1116         0x3c234, 0xffffffff, 0x00000100,
1117         0x3c250, 0xffffffff, 0x00000100,
1118         0x3c254, 0xffffffff, 0x00000100,
1119         0x3c258, 0xffffffff, 0x00000100,
1120         0x3c25c, 0xffffffff, 0x00000100,
1121         0x3c260, 0xffffffff, 0x00000100,
1122         0x3c27c, 0xffffffff, 0x00000100,
1123         0x3c278, 0xffffffff, 0x00000100,
1124         0x3c210, 0xffffffff, 0x06000100,
1125         0x3c290, 0xffffffff, 0x00000100,
1126         0x3c274, 0xffffffff, 0x00000100,
1127         0x3c2b4, 0xffffffff, 0x00000100,
1128         0x3c2b0, 0xffffffff, 0x00000100,
1129         0x3c270, 0xffffffff, 0x00000100,
1130         0x30800, 0xffffffff, 0xe0000000,
1131         0x3c020, 0xffffffff, 0x00010000,
1132         0x3c024, 0xffffffff, 0x00030002,
1133         0x3c028, 0xffffffff, 0x00040007,
1134         0x3c02c, 0xffffffff, 0x00060005,
1135         0x3c030, 0xffffffff, 0x00090008,
1136         0x3c034, 0xffffffff, 0x00010000,
1137         0x3c038, 0xffffffff, 0x00030002,
1138         0x3c03c, 0xffffffff, 0x00040007,
1139         0x3c040, 0xffffffff, 0x00060005,
1140         0x3c044, 0xffffffff, 0x00090008,
1141         0x3c048, 0xffffffff, 0x00010000,
1142         0x3c04c, 0xffffffff, 0x00030002,
1143         0x3c050, 0xffffffff, 0x00040007,
1144         0x3c054, 0xffffffff, 0x00060005,
1145         0x3c058, 0xffffffff, 0x00090008,
1146         0x3c05c, 0xffffffff, 0x00010000,
1147         0x3c060, 0xffffffff, 0x00030002,
1148         0x3c064, 0xffffffff, 0x00040007,
1149         0x3c068, 0xffffffff, 0x00060005,
1150         0x3c06c, 0xffffffff, 0x00090008,
1151         0x3c070, 0xffffffff, 0x00010000,
1152         0x3c074, 0xffffffff, 0x00030002,
1153         0x3c078, 0xffffffff, 0x00040007,
1154         0x3c07c, 0xffffffff, 0x00060005,
1155         0x3c080, 0xffffffff, 0x00090008,
1156         0x3c084, 0xffffffff, 0x00010000,
1157         0x3c088, 0xffffffff, 0x00030002,
1158         0x3c08c, 0xffffffff, 0x00040007,
1159         0x3c090, 0xffffffff, 0x00060005,
1160         0x3c094, 0xffffffff, 0x00090008,
1161         0x3c098, 0xffffffff, 0x00010000,
1162         0x3c09c, 0xffffffff, 0x00030002,
1163         0x3c0a0, 0xffffffff, 0x00040007,
1164         0x3c0a4, 0xffffffff, 0x00060005,
1165         0x3c0a8, 0xffffffff, 0x00090008,
1166         0x3c0ac, 0xffffffff, 0x00010000,
1167         0x3c0b0, 0xffffffff, 0x00030002,
1168         0x3c0b4, 0xffffffff, 0x00040007,
1169         0x3c0b8, 0xffffffff, 0x00060005,
1170         0x3c0bc, 0xffffffff, 0x00090008,
1171         0x3c000, 0xffffffff, 0x96e00200,
1172         0x8708, 0xffffffff, 0x00900100,
1173         0xc424, 0xffffffff, 0x0020003f,
1174         0x38, 0xffffffff, 0x0140001c,
1175         0x3c, 0x000f0000, 0x000f0000,
1176         0x220, 0xffffffff, 0xC060000C,
1177         0x224, 0xc0000fff, 0x00000100,
1178         0xf90, 0xffffffff, 0x00000100,
1179         0xf98, 0x00000101, 0x00000000,
1180         0x20a8, 0xffffffff, 0x00000104,
1181         0x55e4, 0xff000fff, 0x00000100,
1182         0x30cc, 0xc0000fff, 0x00000104,
1183         0xc1e4, 0x00000001, 0x00000001,
1184         0xd00c, 0xff000ff0, 0x00000100,
1185         0xd80c, 0xff000ff0, 0x00000100
1186 };
1187
1188 static const u32 kalindi_golden_spm_registers[] =
1189 {
1190         0x30800, 0xe0ffffff, 0xe0000000
1191 };
1192
1193 static const u32 kalindi_golden_common_registers[] =
1194 {
1195         0xc770, 0xffffffff, 0x00000800,
1196         0xc774, 0xffffffff, 0x00000800,
1197         0xc798, 0xffffffff, 0x00007fbf,
1198         0xc79c, 0xffffffff, 0x00007faf
1199 };
1200
1201 static const u32 kalindi_golden_registers[] =
1202 {
1203         0x3c000, 0xffffdfff, 0x6e944040,
1204         0x55e4, 0xff607fff, 0xfc000100,
1205         0x3c220, 0xff000fff, 0x00000100,
1206         0x3c224, 0xff000fff, 0x00000100,
1207         0x3c200, 0xfffc0fff, 0x00000100,
1208         0x6ed8, 0x00010101, 0x00010000,
1209         0x9830, 0xffffffff, 0x00000000,
1210         0x9834, 0xf00fffff, 0x00000400,
1211         0x5bb0, 0x000000f0, 0x00000070,
1212         0x5bc0, 0xf0311fff, 0x80300000,
1213         0x98f8, 0x73773777, 0x12010001,
1214         0x98fc, 0xffffffff, 0x00000010,
1215         0x9b7c, 0x00ff0000, 0x00fc0000,
1216         0x8030, 0x00001f0f, 0x0000100a,
1217         0x2f48, 0x73773777, 0x12010001,
1218         0x2408, 0x000fffff, 0x000c007f,
1219         0x8a14, 0xf000003f, 0x00000007,
1220         0x8b24, 0x3fff3fff, 0x00ffcfff,
1221         0x30a04, 0x0000ff0f, 0x00000000,
1222         0x28a4c, 0x07ffffff, 0x06000000,
1223         0x4d8, 0x00000fff, 0x00000100,
1224         0x3e78, 0x00000001, 0x00000002,
1225         0xc768, 0x00000008, 0x00000008,
1226         0x8c00, 0x000000ff, 0x00000003,
1227         0x214f8, 0x01ff01ff, 0x00000002,
1228         0x21498, 0x007ff800, 0x00200000,
1229         0x2015c, 0xffffffff, 0x00000f40,
1230         0x88c4, 0x001f3ae3, 0x00000082,
1231         0x88d4, 0x0000001f, 0x00000010,
1232         0x30934, 0xffffffff, 0x00000000
1233 };
1234
1235 static const u32 kalindi_mgcg_cgcg_init[] =
1236 {
1237         0xc420, 0xffffffff, 0xfffffffc,
1238         0x30800, 0xffffffff, 0xe0000000,
1239         0x3c2a0, 0xffffffff, 0x00000100,
1240         0x3c208, 0xffffffff, 0x00000100,
1241         0x3c2c0, 0xffffffff, 0x00000100,
1242         0x3c2c8, 0xffffffff, 0x00000100,
1243         0x3c2c4, 0xffffffff, 0x00000100,
1244         0x55e4, 0xffffffff, 0x00600100,
1245         0x3c280, 0xffffffff, 0x00000100,
1246         0x3c214, 0xffffffff, 0x06000100,
1247         0x3c220, 0xffffffff, 0x00000100,
1248         0x3c218, 0xffffffff, 0x06000100,
1249         0x3c204, 0xffffffff, 0x00000100,
1250         0x3c2e0, 0xffffffff, 0x00000100,
1251         0x3c224, 0xffffffff, 0x00000100,
1252         0x3c200, 0xffffffff, 0x00000100,
1253         0x3c230, 0xffffffff, 0x00000100,
1254         0x3c234, 0xffffffff, 0x00000100,
1255         0x3c250, 0xffffffff, 0x00000100,
1256         0x3c254, 0xffffffff, 0x00000100,
1257         0x3c258, 0xffffffff, 0x00000100,
1258         0x3c25c, 0xffffffff, 0x00000100,
1259         0x3c260, 0xffffffff, 0x00000100,
1260         0x3c27c, 0xffffffff, 0x00000100,
1261         0x3c278, 0xffffffff, 0x00000100,
1262         0x3c210, 0xffffffff, 0x06000100,
1263         0x3c290, 0xffffffff, 0x00000100,
1264         0x3c274, 0xffffffff, 0x00000100,
1265         0x3c2b4, 0xffffffff, 0x00000100,
1266         0x3c2b0, 0xffffffff, 0x00000100,
1267         0x3c270, 0xffffffff, 0x00000100,
1268         0x30800, 0xffffffff, 0xe0000000,
1269         0x3c020, 0xffffffff, 0x00010000,
1270         0x3c024, 0xffffffff, 0x00030002,
1271         0x3c028, 0xffffffff, 0x00040007,
1272         0x3c02c, 0xffffffff, 0x00060005,
1273         0x3c030, 0xffffffff, 0x00090008,
1274         0x3c034, 0xffffffff, 0x00010000,
1275         0x3c038, 0xffffffff, 0x00030002,
1276         0x3c03c, 0xffffffff, 0x00040007,
1277         0x3c040, 0xffffffff, 0x00060005,
1278         0x3c044, 0xffffffff, 0x00090008,
1279         0x3c000, 0xffffffff, 0x96e00200,
1280         0x8708, 0xffffffff, 0x00900100,
1281         0xc424, 0xffffffff, 0x0020003f,
1282         0x38, 0xffffffff, 0x0140001c,
1283         0x3c, 0x000f0000, 0x000f0000,
1284         0x220, 0xffffffff, 0xC060000C,
1285         0x224, 0xc0000fff, 0x00000100,
1286         0x20a8, 0xffffffff, 0x00000104,
1287         0x55e4, 0xff000fff, 0x00000100,
1288         0x30cc, 0xc0000fff, 0x00000104,
1289         0xc1e4, 0x00000001, 0x00000001,
1290         0xd00c, 0xff000ff0, 0x00000100,
1291         0xd80c, 0xff000ff0, 0x00000100
1292 };
1293
1294 static void cik_init_golden_registers(struct radeon_device *rdev)
1295 {
1296         switch (rdev->family) {
1297         case CHIP_BONAIRE:
1298                 radeon_program_register_sequence(rdev,
1299                                                  bonaire_mgcg_cgcg_init,
1300                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1301                 radeon_program_register_sequence(rdev,
1302                                                  bonaire_golden_registers,
1303                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1304                 radeon_program_register_sequence(rdev,
1305                                                  bonaire_golden_common_registers,
1306                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1307                 radeon_program_register_sequence(rdev,
1308                                                  bonaire_golden_spm_registers,
1309                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1310                 break;
1311         case CHIP_KABINI:
1312                 radeon_program_register_sequence(rdev,
1313                                                  kalindi_mgcg_cgcg_init,
1314                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1315                 radeon_program_register_sequence(rdev,
1316                                                  kalindi_golden_registers,
1317                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1318                 radeon_program_register_sequence(rdev,
1319                                                  kalindi_golden_common_registers,
1320                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1321                 radeon_program_register_sequence(rdev,
1322                                                  kalindi_golden_spm_registers,
1323                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1324                 break;
1325         case CHIP_KAVERI:
1326                 radeon_program_register_sequence(rdev,
1327                                                  spectre_mgcg_cgcg_init,
1328                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1329                 radeon_program_register_sequence(rdev,
1330                                                  spectre_golden_registers,
1331                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1332                 radeon_program_register_sequence(rdev,
1333                                                  spectre_golden_common_registers,
1334                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1335                 radeon_program_register_sequence(rdev,
1336                                                  spectre_golden_spm_registers,
1337                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1338                 break;
1339         default:
1340                 break;
1341         }
1342 }
1343
1344 /**
1345  * cik_get_xclk - get the xclk
1346  *
1347  * @rdev: radeon_device pointer
1348  *
1349  * Returns the reference clock used by the gfx engine
1350  * (CIK).
1351  */
1352 u32 cik_get_xclk(struct radeon_device *rdev)
1353 {
1354         u32 reference_clock = rdev->clock.spll.reference_freq;
1355
1356         if (rdev->flags & RADEON_IS_IGP) {
1357                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1358                         return reference_clock / 2;
1359         } else {
1360                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1361                         return reference_clock / 4;
1362         }
1363         return reference_clock;
1364 }
1365
1366 /**
1367  * cik_mm_rdoorbell - read a doorbell dword
1368  *
1369  * @rdev: radeon_device pointer
1370  * @offset: byte offset into the aperture
1371  *
1372  * Returns the value in the doorbell aperture at the
1373  * requested offset (CIK).
1374  */
1375 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1376 {
1377         if (offset < rdev->doorbell.size) {
1378                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1379         } else {
1380                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1381                 return 0;
1382         }
1383 }
1384
1385 /**
1386  * cik_mm_wdoorbell - write a doorbell dword
1387  *
1388  * @rdev: radeon_device pointer
1389  * @offset: byte offset into the aperture
1390  * @v: value to write
1391  *
1392  * Writes @v to the doorbell aperture at the
1393  * requested offset (CIK).
1394  */
1395 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1396 {
1397         if (offset < rdev->doorbell.size) {
1398                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1399         } else {
1400                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1401         }
1402 }
1403
1404 #define BONAIRE_IO_MC_REGS_SIZE 36
1405
1406 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1407 {
1408         {0x00000070, 0x04400000},
1409         {0x00000071, 0x80c01803},
1410         {0x00000072, 0x00004004},
1411         {0x00000073, 0x00000100},
1412         {0x00000074, 0x00ff0000},
1413         {0x00000075, 0x34000000},
1414         {0x00000076, 0x08000014},
1415         {0x00000077, 0x00cc08ec},
1416         {0x00000078, 0x00000400},
1417         {0x00000079, 0x00000000},
1418         {0x0000007a, 0x04090000},
1419         {0x0000007c, 0x00000000},
1420         {0x0000007e, 0x4408a8e8},
1421         {0x0000007f, 0x00000304},
1422         {0x00000080, 0x00000000},
1423         {0x00000082, 0x00000001},
1424         {0x00000083, 0x00000002},
1425         {0x00000084, 0xf3e4f400},
1426         {0x00000085, 0x052024e3},
1427         {0x00000087, 0x00000000},
1428         {0x00000088, 0x01000000},
1429         {0x0000008a, 0x1c0a0000},
1430         {0x0000008b, 0xff010000},
1431         {0x0000008d, 0xffffefff},
1432         {0x0000008e, 0xfff3efff},
1433         {0x0000008f, 0xfff3efbf},
1434         {0x00000092, 0xf7ffffff},
1435         {0x00000093, 0xffffff7f},
1436         {0x00000095, 0x00101101},
1437         {0x00000096, 0x00000fff},
1438         {0x00000097, 0x00116fff},
1439         {0x00000098, 0x60010000},
1440         {0x00000099, 0x10010000},
1441         {0x0000009a, 0x00006000},
1442         {0x0000009b, 0x00001000},
1443         {0x0000009f, 0x00b48000}
1444 };
1445
1446 /**
1447  * cik_srbm_select - select specific register instances
1448  *
1449  * @rdev: radeon_device pointer
1450  * @me: selected ME (micro engine)
1451  * @pipe: pipe
1452  * @queue: queue
1453  * @vmid: VMID
1454  *
1455  * Switches the currently active registers instances.  Some
1456  * registers are instanced per VMID, others are instanced per
1457  * me/pipe/queue combination.
1458  */
1459 static void cik_srbm_select(struct radeon_device *rdev,
1460                             u32 me, u32 pipe, u32 queue, u32 vmid)
1461 {
1462         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1463                              MEID(me & 0x3) |
1464                              VMID(vmid & 0xf) |
1465                              QUEUEID(queue & 0x7));
1466         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1467 }
1468
1469 /* ucode loading */
1470 /**
1471  * ci_mc_load_microcode - load MC ucode into the hw
1472  *
1473  * @rdev: radeon_device pointer
1474  *
1475  * Load the GDDR MC ucode into the hw (CIK).
1476  * Returns 0 on success, error on failure.
1477  */
1478 static int ci_mc_load_microcode(struct radeon_device *rdev)
1479 {
1480         const __be32 *fw_data;
1481         u32 running, blackout = 0;
1482         u32 *io_mc_regs;
1483         int i, ucode_size, regs_size;
1484
1485         if (!rdev->mc_fw)
1486                 return -EINVAL;
1487
1488         switch (rdev->family) {
1489         case CHIP_BONAIRE:
1490         default:
1491                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1492                 ucode_size = CIK_MC_UCODE_SIZE;
1493                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1494                 break;
1495         }
1496
1497         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1498
1499         if (running == 0) {
1500                 if (running) {
1501                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1502                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1503                 }
1504
1505                 /* reset the engine and set to writable */
1506                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1507                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1508
1509                 /* load mc io regs */
1510                 for (i = 0; i < regs_size; i++) {
1511                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1512                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1513                 }
1514                 /* load the MC ucode */
1515                 fw_data = (const __be32 *)rdev->mc_fw->data;
1516                 for (i = 0; i < ucode_size; i++)
1517                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1518
1519                 /* put the engine back into the active state */
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1521                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1522                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1523
1524                 /* wait for training to complete */
1525                 for (i = 0; i < rdev->usec_timeout; i++) {
1526                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1527                                 break;
1528                         udelay(1);
1529                 }
1530                 for (i = 0; i < rdev->usec_timeout; i++) {
1531                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1532                                 break;
1533                         udelay(1);
1534                 }
1535
1536                 if (running)
1537                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1538         }
1539
1540         return 0;
1541 }
1542
1543 /**
1544  * cik_init_microcode - load ucode images from disk
1545  *
1546  * @rdev: radeon_device pointer
1547  *
1548  * Use the firmware interface to load the ucode images into
1549  * the driver (not loaded into hw).
1550  * Returns 0 on success, error on failure.
1551  */
1552 static int cik_init_microcode(struct radeon_device *rdev)
1553 {
1554         const char *chip_name;
1555         size_t pfp_req_size, me_req_size, ce_req_size,
1556                 mec_req_size, rlc_req_size, mc_req_size,
1557                 sdma_req_size, smc_req_size;
1558         char fw_name[30];
1559         int err;
1560
1561         DRM_DEBUG("\n");
1562
1563         switch (rdev->family) {
1564         case CHIP_BONAIRE:
1565                 chip_name = "BONAIRE";
1566                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1567                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1568                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1569                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1570                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1571                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1572                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1573                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1574                 break;
1575         case CHIP_KAVERI:
1576                 chip_name = "KAVERI";
1577                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1582                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1583                 break;
1584         case CHIP_KABINI:
1585                 chip_name = "KABINI";
1586                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1591                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1592                 break;
1593         default: BUG();
1594         }
1595
1596         DRM_INFO("Loading %s Microcode\n", chip_name);
1597
1598         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1599         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1600         if (err)
1601                 goto out;
1602         if (rdev->pfp_fw->size != pfp_req_size) {
1603                 printk(KERN_ERR
1604                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1605                        rdev->pfp_fw->size, fw_name);
1606                 err = -EINVAL;
1607                 goto out;
1608         }
1609
1610         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1611         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1612         if (err)
1613                 goto out;
1614         if (rdev->me_fw->size != me_req_size) {
1615                 printk(KERN_ERR
1616                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1617                        rdev->me_fw->size, fw_name);
1618                 err = -EINVAL;
1619         }
1620
1621         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1622         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1623         if (err)
1624                 goto out;
1625         if (rdev->ce_fw->size != ce_req_size) {
1626                 printk(KERN_ERR
1627                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628                        rdev->ce_fw->size, fw_name);
1629                 err = -EINVAL;
1630         }
1631
1632         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1633         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1634         if (err)
1635                 goto out;
1636         if (rdev->mec_fw->size != mec_req_size) {
1637                 printk(KERN_ERR
1638                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639                        rdev->mec_fw->size, fw_name);
1640                 err = -EINVAL;
1641         }
1642
1643         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1644         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1645         if (err)
1646                 goto out;
1647         if (rdev->rlc_fw->size != rlc_req_size) {
1648                 printk(KERN_ERR
1649                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1650                        rdev->rlc_fw->size, fw_name);
1651                 err = -EINVAL;
1652         }
1653
1654         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1655         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1656         if (err)
1657                 goto out;
1658         if (rdev->sdma_fw->size != sdma_req_size) {
1659                 printk(KERN_ERR
1660                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1661                        rdev->sdma_fw->size, fw_name);
1662                 err = -EINVAL;
1663         }
1664
1665         /* No SMC, MC ucode on APUs */
1666         if (!(rdev->flags & RADEON_IS_IGP)) {
1667                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1668                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1669                 if (err)
1670                         goto out;
1671                 if (rdev->mc_fw->size != mc_req_size) {
1672                         printk(KERN_ERR
1673                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1674                                rdev->mc_fw->size, fw_name);
1675                         err = -EINVAL;
1676                 }
1677
1678                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1679                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1680                 if (err) {
1681                         printk(KERN_ERR
1682                                "smc: error loading firmware \"%s\"\n",
1683                                fw_name);
1684                         release_firmware(rdev->smc_fw);
1685                         rdev->smc_fw = NULL;
1686                 } else if (rdev->smc_fw->size != smc_req_size) {
1687                         printk(KERN_ERR
1688                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1689                                rdev->smc_fw->size, fw_name);
1690                         err = -EINVAL;
1691                 }
1692         }
1693
1694 out:
1695         if (err) {
1696                 if (err != -EINVAL)
1697                         printk(KERN_ERR
1698                                "cik_cp: Failed to load firmware \"%s\"\n",
1699                                fw_name);
1700                 release_firmware(rdev->pfp_fw);
1701                 rdev->pfp_fw = NULL;
1702                 release_firmware(rdev->me_fw);
1703                 rdev->me_fw = NULL;
1704                 release_firmware(rdev->ce_fw);
1705                 rdev->ce_fw = NULL;
1706                 release_firmware(rdev->rlc_fw);
1707                 rdev->rlc_fw = NULL;
1708                 release_firmware(rdev->mc_fw);
1709                 rdev->mc_fw = NULL;
1710                 release_firmware(rdev->smc_fw);
1711                 rdev->smc_fw = NULL;
1712         }
1713         return err;
1714 }
1715
1716 /*
1717  * Core functions
1718  */
1719 /**
1720  * cik_tiling_mode_table_init - init the hw tiling table
1721  *
1722  * @rdev: radeon_device pointer
1723  *
1724  * Starting with SI, the tiling setup is done globally in a
1725  * set of 32 tiling modes.  Rather than selecting each set of
1726  * parameters per surface as on older asics, we just select
1727  * which index in the tiling table we want to use, and the
1728  * surface uses those parameters (CIK).
1729  */
1730 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1731 {
1732         const u32 num_tile_mode_states = 32;
1733         const u32 num_secondary_tile_mode_states = 16;
1734         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1735         u32 num_pipe_configs;
1736         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1737                 rdev->config.cik.max_shader_engines;
1738
1739         switch (rdev->config.cik.mem_row_size_in_kb) {
1740         case 1:
1741                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1742                 break;
1743         case 2:
1744         default:
1745                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1746                 break;
1747         case 4:
1748                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1749                 break;
1750         }
1751
1752         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1753         if (num_pipe_configs > 8)
1754                 num_pipe_configs = 8; /* ??? */
1755
1756         if (num_pipe_configs == 8) {
1757                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1758                         switch (reg_offset) {
1759                         case 0:
1760                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1761                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1762                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1763                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1764                                 break;
1765                         case 1:
1766                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1767                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1768                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1769                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1770                                 break;
1771                         case 2:
1772                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1776                                 break;
1777                         case 3:
1778                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1782                                 break;
1783                         case 4:
1784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787                                                  TILE_SPLIT(split_equal_to_row_size));
1788                                 break;
1789                         case 5:
1790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1792                                 break;
1793                         case 6:
1794                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1795                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1796                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1798                                 break;
1799                         case 7:
1800                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1801                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1802                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1803                                                  TILE_SPLIT(split_equal_to_row_size));
1804                                 break;
1805                         case 8:
1806                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1807                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1808                                 break;
1809                         case 9:
1810                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1811                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1812                                 break;
1813                         case 10:
1814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1815                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1816                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1817                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1818                                 break;
1819                         case 11:
1820                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1821                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1822                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1823                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1824                                 break;
1825                         case 12:
1826                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1827                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830                                 break;
1831                         case 13:
1832                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1833                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1834                                 break;
1835                         case 14:
1836                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1837                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1838                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1839                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1840                                 break;
1841                         case 16:
1842                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1843                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1844                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1845                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1846                                 break;
1847                         case 17:
1848                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1849                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852                                 break;
1853                         case 27:
1854                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1855                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1856                                 break;
1857                         case 28:
1858                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1860                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1861                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1862                                 break;
1863                         case 29:
1864                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1865                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1866                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1867                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1868                                 break;
1869                         case 30:
1870                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1871                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874                                 break;
1875                         default:
1876                                 gb_tile_moden = 0;
1877                                 break;
1878                         }
1879                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1880                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1881                 }
1882                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1883                         switch (reg_offset) {
1884                         case 0:
1885                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1887                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1888                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1889                                 break;
1890                         case 1:
1891                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1892                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1893                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1894                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1895                                 break;
1896                         case 2:
1897                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1899                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1900                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1901                                 break;
1902                         case 3:
1903                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1905                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1907                                 break;
1908                         case 4:
1909                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1912                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1913                                 break;
1914                         case 5:
1915                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1918                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1919                                 break;
1920                         case 6:
1921                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1925                                 break;
1926                         case 8:
1927                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1929                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1930                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1931                                 break;
1932                         case 9:
1933                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1935                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1936                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1937                                 break;
1938                         case 10:
1939                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1941                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1942                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1943                                 break;
1944                         case 11:
1945                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1947                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1948                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1949                                 break;
1950                         case 12:
1951                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1953                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1954                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1955                                 break;
1956                         case 13:
1957                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1960                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1961                                 break;
1962                         case 14:
1963                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1967                                 break;
1968                         default:
1969                                 gb_tile_moden = 0;
1970                                 break;
1971                         }
1972                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1973                 }
1974         } else if (num_pipe_configs == 4) {
1975                 if (num_rbs == 4) {
1976                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1977                                 switch (reg_offset) {
1978                                 case 0:
1979                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1981                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1982                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1983                                         break;
1984                                 case 1:
1985                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1988                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1989                                         break;
1990                                 case 2:
1991                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1995                                         break;
1996                                 case 3:
1997                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2001                                         break;
2002                                 case 4:
2003                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006                                                          TILE_SPLIT(split_equal_to_row_size));
2007                                         break;
2008                                 case 5:
2009                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2011                                         break;
2012                                 case 6:
2013                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017                                         break;
2018                                 case 7:
2019                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2022                                                          TILE_SPLIT(split_equal_to_row_size));
2023                                         break;
2024                                 case 8:
2025                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2026                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2027                                         break;
2028                                 case 9:
2029                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2031                                         break;
2032                                 case 10:
2033                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2035                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2036                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2037                                         break;
2038                                 case 11:
2039                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2040                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2042                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043                                         break;
2044                                 case 12:
2045                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2046                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049                                         break;
2050                                 case 13:
2051                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2053                                         break;
2054                                 case 14:
2055                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2057                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2058                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059                                         break;
2060                                 case 16:
2061                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065                                         break;
2066                                 case 17:
2067                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                                         break;
2072                                 case 27:
2073                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2075                                         break;
2076                                 case 28:
2077                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2078                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2079                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2080                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081                                         break;
2082                                 case 29:
2083                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2086                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087                                         break;
2088                                 case 30:
2089                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                                         break;
2094                                 default:
2095                                         gb_tile_moden = 0;
2096                                         break;
2097                                 }
2098                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2099                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2100                         }
2101                 } else if (num_rbs < 4) {
2102                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2103                                 switch (reg_offset) {
2104                                 case 0:
2105                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2106                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2107                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2108                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2109                                         break;
2110                                 case 1:
2111                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2113                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2115                                         break;
2116                                 case 2:
2117                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2121                                         break;
2122                                 case 3:
2123                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2127                                         break;
2128                                 case 4:
2129                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132                                                          TILE_SPLIT(split_equal_to_row_size));
2133                                         break;
2134                                 case 5:
2135                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137                                         break;
2138                                 case 6:
2139                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2141                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2143                                         break;
2144                                 case 7:
2145                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2147                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148                                                          TILE_SPLIT(split_equal_to_row_size));
2149                                         break;
2150                                 case 8:
2151                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2152                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2153                                         break;
2154                                 case 9:
2155                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2157                                         break;
2158                                 case 10:
2159                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2162                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163                                         break;
2164                                 case 11:
2165                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2168                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169                                         break;
2170                                 case 12:
2171                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2172                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175                                         break;
2176                                 case 13:
2177                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2179                                         break;
2180                                 case 14:
2181                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185                                         break;
2186                                 case 16:
2187                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2190                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2191                                         break;
2192                                 case 17:
2193                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2194                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197                                         break;
2198                                 case 27:
2199                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2201                                         break;
2202                                 case 28:
2203                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2204                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2206                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207                                         break;
2208                                 case 29:
2209                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2212                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2213                                         break;
2214                                 case 30:
2215                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219                                         break;
2220                                 default:
2221                                         gb_tile_moden = 0;
2222                                         break;
2223                                 }
2224                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2225                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2226                         }
2227                 }
2228                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2229                         switch (reg_offset) {
2230                         case 0:
2231                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2235                                 break;
2236                         case 1:
2237                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2240                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2241                                 break;
2242                         case 2:
2243                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2247                                 break;
2248                         case 3:
2249                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2251                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2253                                 break;
2254                         case 4:
2255                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2259                                 break;
2260                         case 5:
2261                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2265                                 break;
2266                         case 6:
2267                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2271                                 break;
2272                         case 8:
2273                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2275                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2277                                 break;
2278                         case 9:
2279                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2283                                 break;
2284                         case 10:
2285                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2289                                 break;
2290                         case 11:
2291                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                                 break;
2296                         case 12:
2297                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2301                                 break;
2302                         case 13:
2303                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2307                                 break;
2308                         case 14:
2309                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2312                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2313                                 break;
2314                         default:
2315                                 gb_tile_moden = 0;
2316                                 break;
2317                         }
2318                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2319                 }
2320         } else if (num_pipe_configs == 2) {
2321                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2322                         switch (reg_offset) {
2323                         case 0:
2324                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2327                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2328                                 break;
2329                         case 1:
2330                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2333                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2334                                 break;
2335                         case 2:
2336                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2339                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2340                                 break;
2341                         case 3:
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2345                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2346                                 break;
2347                         case 4:
2348                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2351                                                  TILE_SPLIT(split_equal_to_row_size));
2352                                 break;
2353                         case 5:
2354                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356                                 break;
2357                         case 6:
2358                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2359                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2360                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2361                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2362                                 break;
2363                         case 7:
2364                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2367                                                  TILE_SPLIT(split_equal_to_row_size));
2368                                 break;
2369                         case 8:
2370                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2371                                 break;
2372                         case 9:
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2375                                 break;
2376                         case 10:
2377                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2380                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                                 break;
2382                         case 11:
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2386                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387                                 break;
2388                         case 12:
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2392                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393                                 break;
2394                         case 13:
2395                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2397                                 break;
2398                         case 14:
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2402                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403                                 break;
2404                         case 16:
2405                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2408                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409                                 break;
2410                         case 17:
2411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2412                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2414                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                                 break;
2416                         case 27:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2419                                 break;
2420                         case 28:
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2424                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                                 break;
2426                         case 29:
2427                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2430                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                                 break;
2432                         case 30:
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2436                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                                 break;
2438                         default:
2439                                 gb_tile_moden = 0;
2440                                 break;
2441                         }
2442                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2443                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2444                 }
2445                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2446                         switch (reg_offset) {
2447                         case 0:
2448                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2451                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2452                                 break;
2453                         case 1:
2454                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2455                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2458                                 break;
2459                         case 2:
2460                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2464                                 break;
2465                         case 3:
2466                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2470                                 break;
2471                         case 4:
2472                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2476                                 break;
2477                         case 5:
2478                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2482                                 break;
2483                         case 6:
2484                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2488                                 break;
2489                         case 8:
2490                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2494                                 break;
2495                         case 9:
2496                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2500                                 break;
2501                         case 10:
2502                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2503                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2506                                 break;
2507                         case 11:
2508                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2512                                 break;
2513                         case 12:
2514                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2516                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2518                                 break;
2519                         case 13:
2520                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2524                                 break;
2525                         case 14:
2526                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2530                                 break;
2531                         default:
2532                                 gb_tile_moden = 0;
2533                                 break;
2534                         }
2535                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2536                 }
2537         } else
2538                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2539 }
2540
2541 /**
2542  * cik_select_se_sh - select which SE, SH to address
2543  *
2544  * @rdev: radeon_device pointer
2545  * @se_num: shader engine to address
2546  * @sh_num: sh block to address
2547  *
2548  * Select which SE, SH combinations to address. Certain
2549  * registers are instanced per SE or SH.  0xffffffff means
2550  * broadcast to all SEs or SHs (CIK).
2551  */
2552 static void cik_select_se_sh(struct radeon_device *rdev,
2553                              u32 se_num, u32 sh_num)
2554 {
2555         u32 data = INSTANCE_BROADCAST_WRITES;
2556
2557         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2558                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2559         else if (se_num == 0xffffffff)
2560                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2561         else if (sh_num == 0xffffffff)
2562                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2563         else
2564                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2565         WREG32(GRBM_GFX_INDEX, data);
2566 }
2567
2568 /**
2569  * cik_create_bitmask - create a bitmask
2570  *
2571  * @bit_width: length of the mask
2572  *
2573  * create a variable length bit mask (CIK).
2574  * Returns the bitmask.
2575  */
2576 static u32 cik_create_bitmask(u32 bit_width)
2577 {
2578         u32 i, mask = 0;
2579
2580         for (i = 0; i < bit_width; i++) {
2581                 mask <<= 1;
2582                 mask |= 1;
2583         }
2584         return mask;
2585 }
2586
2587 /**
2588  * cik_select_se_sh - select which SE, SH to address
2589  *
2590  * @rdev: radeon_device pointer
2591  * @max_rb_num: max RBs (render backends) for the asic
2592  * @se_num: number of SEs (shader engines) for the asic
2593  * @sh_per_se: number of SH blocks per SE for the asic
2594  *
2595  * Calculates the bitmask of disabled RBs (CIK).
2596  * Returns the disabled RB bitmask.
2597  */
2598 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2599                               u32 max_rb_num, u32 se_num,
2600                               u32 sh_per_se)
2601 {
2602         u32 data, mask;
2603
2604         data = RREG32(CC_RB_BACKEND_DISABLE);
2605         if (data & 1)
2606                 data &= BACKEND_DISABLE_MASK;
2607         else
2608                 data = 0;
2609         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2610
2611         data >>= BACKEND_DISABLE_SHIFT;
2612
2613         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2614
2615         return data & mask;
2616 }
2617
2618 /**
2619  * cik_setup_rb - setup the RBs on the asic
2620  *
2621  * @rdev: radeon_device pointer
2622  * @se_num: number of SEs (shader engines) for the asic
2623  * @sh_per_se: number of SH blocks per SE for the asic
2624  * @max_rb_num: max RBs (render backends) for the asic
2625  *
2626  * Configures per-SE/SH RB registers (CIK).
2627  */
2628 static void cik_setup_rb(struct radeon_device *rdev,
2629                          u32 se_num, u32 sh_per_se,
2630                          u32 max_rb_num)
2631 {
2632         int i, j;
2633         u32 data, mask;
2634         u32 disabled_rbs = 0;
2635         u32 enabled_rbs = 0;
2636
2637         for (i = 0; i < se_num; i++) {
2638                 for (j = 0; j < sh_per_se; j++) {
2639                         cik_select_se_sh(rdev, i, j);
2640                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2641                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2642                 }
2643         }
2644         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2645
2646         mask = 1;
2647         for (i = 0; i < max_rb_num; i++) {
2648                 if (!(disabled_rbs & mask))
2649                         enabled_rbs |= mask;
2650                 mask <<= 1;
2651         }
2652
2653         for (i = 0; i < se_num; i++) {
2654                 cik_select_se_sh(rdev, i, 0xffffffff);
2655                 data = 0;
2656                 for (j = 0; j < sh_per_se; j++) {
2657                         switch (enabled_rbs & 3) {
2658                         case 1:
2659                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2660                                 break;
2661                         case 2:
2662                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2663                                 break;
2664                         case 3:
2665                         default:
2666                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2667                                 break;
2668                         }
2669                         enabled_rbs >>= 2;
2670                 }
2671                 WREG32(PA_SC_RASTER_CONFIG, data);
2672         }
2673         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2674 }
2675
2676 /**
2677  * cik_gpu_init - setup the 3D engine
2678  *
2679  * @rdev: radeon_device pointer
2680  *
2681  * Configures the 3D engine and tiling configuration
2682  * registers so that the 3D engine is usable.
2683  */
2684 static void cik_gpu_init(struct radeon_device *rdev)
2685 {
2686         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2687         u32 mc_shared_chmap, mc_arb_ramcfg;
2688         u32 hdp_host_path_cntl;
2689         u32 tmp;
2690         int i, j;
2691
2692         switch (rdev->family) {
2693         case CHIP_BONAIRE:
2694                 rdev->config.cik.max_shader_engines = 2;
2695                 rdev->config.cik.max_tile_pipes = 4;
2696                 rdev->config.cik.max_cu_per_sh = 7;
2697                 rdev->config.cik.max_sh_per_se = 1;
2698                 rdev->config.cik.max_backends_per_se = 2;
2699                 rdev->config.cik.max_texture_channel_caches = 4;
2700                 rdev->config.cik.max_gprs = 256;
2701                 rdev->config.cik.max_gs_threads = 32;
2702                 rdev->config.cik.max_hw_contexts = 8;
2703
2704                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2705                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2706                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2707                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2708                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2709                 break;
2710         case CHIP_KAVERI:
2711                 /* TODO */
2712                 break;
2713         case CHIP_KABINI:
2714         default:
2715                 rdev->config.cik.max_shader_engines = 1;
2716                 rdev->config.cik.max_tile_pipes = 2;
2717                 rdev->config.cik.max_cu_per_sh = 2;
2718                 rdev->config.cik.max_sh_per_se = 1;
2719                 rdev->config.cik.max_backends_per_se = 1;
2720                 rdev->config.cik.max_texture_channel_caches = 2;
2721                 rdev->config.cik.max_gprs = 256;
2722                 rdev->config.cik.max_gs_threads = 16;
2723                 rdev->config.cik.max_hw_contexts = 8;
2724
2725                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2726                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2727                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2728                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2729                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2730                 break;
2731         }
2732
2733         /* Initialize HDP */
2734         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2735                 WREG32((0x2c14 + j), 0x00000000);
2736                 WREG32((0x2c18 + j), 0x00000000);
2737                 WREG32((0x2c1c + j), 0x00000000);
2738                 WREG32((0x2c20 + j), 0x00000000);
2739                 WREG32((0x2c24 + j), 0x00000000);
2740         }
2741
2742         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2743
2744         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2745
2746         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2747         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2748
2749         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2750         rdev->config.cik.mem_max_burst_length_bytes = 256;
2751         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2752         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2753         if (rdev->config.cik.mem_row_size_in_kb > 4)
2754                 rdev->config.cik.mem_row_size_in_kb = 4;
2755         /* XXX use MC settings? */
2756         rdev->config.cik.shader_engine_tile_size = 32;
2757         rdev->config.cik.num_gpus = 1;
2758         rdev->config.cik.multi_gpu_tile_size = 64;
2759
2760         /* fix up row size */
2761         gb_addr_config &= ~ROW_SIZE_MASK;
2762         switch (rdev->config.cik.mem_row_size_in_kb) {
2763         case 1:
2764         default:
2765                 gb_addr_config |= ROW_SIZE(0);
2766                 break;
2767         case 2:
2768                 gb_addr_config |= ROW_SIZE(1);
2769                 break;
2770         case 4:
2771                 gb_addr_config |= ROW_SIZE(2);
2772                 break;
2773         }
2774
2775         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2776          * not have bank info, so create a custom tiling dword.
2777          * bits 3:0   num_pipes
2778          * bits 7:4   num_banks
2779          * bits 11:8  group_size
2780          * bits 15:12 row_size
2781          */
2782         rdev->config.cik.tile_config = 0;
2783         switch (rdev->config.cik.num_tile_pipes) {
2784         case 1:
2785                 rdev->config.cik.tile_config |= (0 << 0);
2786                 break;
2787         case 2:
2788                 rdev->config.cik.tile_config |= (1 << 0);
2789                 break;
2790         case 4:
2791                 rdev->config.cik.tile_config |= (2 << 0);
2792                 break;
2793         case 8:
2794         default:
2795                 /* XXX what about 12? */
2796                 rdev->config.cik.tile_config |= (3 << 0);
2797                 break;
2798         }
2799         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2800                 rdev->config.cik.tile_config |= 1 << 4;
2801         else
2802                 rdev->config.cik.tile_config |= 0 << 4;
2803         rdev->config.cik.tile_config |=
2804                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2805         rdev->config.cik.tile_config |=
2806                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2807
2808         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2809         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2810         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2811         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2812         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2813         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2814         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2815         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2816
2817         cik_tiling_mode_table_init(rdev);
2818
2819         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2820                      rdev->config.cik.max_sh_per_se,
2821                      rdev->config.cik.max_backends_per_se);
2822
2823         /* set HW defaults for 3D engine */
2824         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2825
2826         WREG32(SX_DEBUG_1, 0x20);
2827
2828         WREG32(TA_CNTL_AUX, 0x00010000);
2829
2830         tmp = RREG32(SPI_CONFIG_CNTL);
2831         tmp |= 0x03000000;
2832         WREG32(SPI_CONFIG_CNTL, tmp);
2833
2834         WREG32(SQ_CONFIG, 1);
2835
2836         WREG32(DB_DEBUG, 0);
2837
2838         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2839         tmp |= 0x00000400;
2840         WREG32(DB_DEBUG2, tmp);
2841
2842         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2843         tmp |= 0x00020200;
2844         WREG32(DB_DEBUG3, tmp);
2845
2846         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2847         tmp |= 0x00018208;
2848         WREG32(CB_HW_CONTROL, tmp);
2849
2850         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2851
2852         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2853                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2854                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2855                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2856
2857         WREG32(VGT_NUM_INSTANCES, 1);
2858
2859         WREG32(CP_PERFMON_CNTL, 0);
2860
2861         WREG32(SQ_CONFIG, 0);
2862
2863         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2864                                           FORCE_EOV_MAX_REZ_CNT(255)));
2865
2866         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2867                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2868
2869         WREG32(VGT_GS_VERTEX_REUSE, 16);
2870         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2871
2872         tmp = RREG32(HDP_MISC_CNTL);
2873         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2874         WREG32(HDP_MISC_CNTL, tmp);
2875
2876         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2877         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2878
2879         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2880         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2881
2882         udelay(50);
2883 }
2884
2885 /*
2886  * GPU scratch registers helpers function.
2887  */
2888 /**
2889  * cik_scratch_init - setup driver info for CP scratch regs
2890  *
2891  * @rdev: radeon_device pointer
2892  *
2893  * Set up the number and offset of the CP scratch registers.
2894  * NOTE: use of CP scratch registers is a legacy inferface and
2895  * is not used by default on newer asics (r6xx+).  On newer asics,
2896  * memory buffers are used for fences rather than scratch regs.
2897  */
2898 static void cik_scratch_init(struct radeon_device *rdev)
2899 {
2900         int i;
2901
2902         rdev->scratch.num_reg = 7;
2903         rdev->scratch.reg_base = SCRATCH_REG0;
2904         for (i = 0; i < rdev->scratch.num_reg; i++) {
2905                 rdev->scratch.free[i] = true;
2906                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2907         }
2908 }
2909
2910 /**
2911  * cik_ring_test - basic gfx ring test
2912  *
2913  * @rdev: radeon_device pointer
2914  * @ring: radeon_ring structure holding ring information
2915  *
2916  * Allocate a scratch register and write to it using the gfx ring (CIK).
2917  * Provides a basic gfx ring test to verify that the ring is working.
2918  * Used by cik_cp_gfx_resume();
2919  * Returns 0 on success, error on failure.
2920  */
2921 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2922 {
2923         uint32_t scratch;
2924         uint32_t tmp = 0;
2925         unsigned i;
2926         int r;
2927
2928         r = radeon_scratch_get(rdev, &scratch);
2929         if (r) {
2930                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2931                 return r;
2932         }
2933         WREG32(scratch, 0xCAFEDEAD);
2934         r = radeon_ring_lock(rdev, ring, 3);
2935         if (r) {
2936                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2937                 radeon_scratch_free(rdev, scratch);
2938                 return r;
2939         }
2940         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2941         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2942         radeon_ring_write(ring, 0xDEADBEEF);
2943         radeon_ring_unlock_commit(rdev, ring);
2944
2945         for (i = 0; i < rdev->usec_timeout; i++) {
2946                 tmp = RREG32(scratch);
2947                 if (tmp == 0xDEADBEEF)
2948                         break;
2949                 DRM_UDELAY(1);
2950         }
2951         if (i < rdev->usec_timeout) {
2952                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2953         } else {
2954                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2955                           ring->idx, scratch, tmp);
2956                 r = -EINVAL;
2957         }
2958         radeon_scratch_free(rdev, scratch);
2959         return r;
2960 }
2961
2962 /**
2963  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2964  *
2965  * @rdev: radeon_device pointer
2966  * @fence: radeon fence object
2967  *
2968  * Emits a fence sequnce number on the gfx ring and flushes
2969  * GPU caches.
2970  */
2971 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2972                              struct radeon_fence *fence)
2973 {
2974         struct radeon_ring *ring = &rdev->ring[fence->ring];
2975         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2976
2977         /* EVENT_WRITE_EOP - flush caches, send int */
2978         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2979         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2980                                  EOP_TC_ACTION_EN |
2981                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2982                                  EVENT_INDEX(5)));
2983         radeon_ring_write(ring, addr & 0xfffffffc);
2984         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2985         radeon_ring_write(ring, fence->seq);
2986         radeon_ring_write(ring, 0);
2987         /* HDP flush */
2988         /* We should be using the new WAIT_REG_MEM special op packet here
2989          * but it causes the CP to hang
2990          */
2991         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2992         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2993                                  WRITE_DATA_DST_SEL(0)));
2994         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2995         radeon_ring_write(ring, 0);
2996         radeon_ring_write(ring, 0);
2997 }
2998
2999 /**
3000  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3001  *
3002  * @rdev: radeon_device pointer
3003  * @fence: radeon fence object
3004  *
3005  * Emits a fence sequnce number on the compute ring and flushes
3006  * GPU caches.
3007  */
3008 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3009                                  struct radeon_fence *fence)
3010 {
3011         struct radeon_ring *ring = &rdev->ring[fence->ring];
3012         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3013
3014         /* RELEASE_MEM - flush caches, send int */
3015         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3016         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3017                                  EOP_TC_ACTION_EN |
3018                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3019                                  EVENT_INDEX(5)));
3020         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3021         radeon_ring_write(ring, addr & 0xfffffffc);
3022         radeon_ring_write(ring, upper_32_bits(addr));
3023         radeon_ring_write(ring, fence->seq);
3024         radeon_ring_write(ring, 0);
3025         /* HDP flush */
3026         /* We should be using the new WAIT_REG_MEM special op packet here
3027          * but it causes the CP to hang
3028          */
3029         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3030         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3031                                  WRITE_DATA_DST_SEL(0)));
3032         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3033         radeon_ring_write(ring, 0);
3034         radeon_ring_write(ring, 0);
3035 }
3036
3037 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3038                              struct radeon_ring *ring,
3039                              struct radeon_semaphore *semaphore,
3040                              bool emit_wait)
3041 {
3042         uint64_t addr = semaphore->gpu_addr;
3043         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3044
3045         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3046         radeon_ring_write(ring, addr & 0xffffffff);
3047         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3048 }
3049
3050 /*
3051  * IB stuff
3052  */
3053 /**
3054  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3055  *
3056  * @rdev: radeon_device pointer
3057  * @ib: radeon indirect buffer object
3058  *
3059  * Emits an DE (drawing engine) or CE (constant engine) IB
3060  * on the gfx ring.  IBs are usually generated by userspace
3061  * acceleration drivers and submitted to the kernel for
3062  * sheduling on the ring.  This function schedules the IB
3063  * on the gfx ring for execution by the GPU.
3064  */
3065 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3066 {
3067         struct radeon_ring *ring = &rdev->ring[ib->ring];
3068         u32 header, control = INDIRECT_BUFFER_VALID;
3069
3070         if (ib->is_const_ib) {
3071                 /* set switch buffer packet before const IB */
3072                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3073                 radeon_ring_write(ring, 0);
3074
3075                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3076         } else {
3077                 u32 next_rptr;
3078                 if (ring->rptr_save_reg) {
3079                         next_rptr = ring->wptr + 3 + 4;
3080                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3081                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3082                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3083                         radeon_ring_write(ring, next_rptr);
3084                 } else if (rdev->wb.enabled) {
3085                         next_rptr = ring->wptr + 5 + 4;
3086                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3087                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3088                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3089                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3090                         radeon_ring_write(ring, next_rptr);
3091                 }
3092
3093                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3094         }
3095
3096         control |= ib->length_dw |
3097                 (ib->vm ? (ib->vm->id << 24) : 0);
3098
3099         radeon_ring_write(ring, header);
3100         radeon_ring_write(ring,
3101 #ifdef __BIG_ENDIAN
3102                           (2 << 0) |
3103 #endif
3104                           (ib->gpu_addr & 0xFFFFFFFC));
3105         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3106         radeon_ring_write(ring, control);
3107 }
3108
3109 /**
3110  * cik_ib_test - basic gfx ring IB test
3111  *
3112  * @rdev: radeon_device pointer
3113  * @ring: radeon_ring structure holding ring information
3114  *
3115  * Allocate an IB and execute it on the gfx ring (CIK).
3116  * Provides a basic gfx ring test to verify that IBs are working.
3117  * Returns 0 on success, error on failure.
3118  */
3119 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3120 {
3121         struct radeon_ib ib;
3122         uint32_t scratch;
3123         uint32_t tmp = 0;
3124         unsigned i;
3125         int r;
3126
3127         r = radeon_scratch_get(rdev, &scratch);
3128         if (r) {
3129                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3130                 return r;
3131         }
3132         WREG32(scratch, 0xCAFEDEAD);
3133         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3134         if (r) {
3135                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3136                 return r;
3137         }
3138         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3139         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3140         ib.ptr[2] = 0xDEADBEEF;
3141         ib.length_dw = 3;
3142         r = radeon_ib_schedule(rdev, &ib, NULL);
3143         if (r) {
3144                 radeon_scratch_free(rdev, scratch);
3145                 radeon_ib_free(rdev, &ib);
3146                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3147                 return r;
3148         }
3149         r = radeon_fence_wait(ib.fence, false);
3150         if (r) {
3151                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3152                 return r;
3153         }
3154         for (i = 0; i < rdev->usec_timeout; i++) {
3155                 tmp = RREG32(scratch);
3156                 if (tmp == 0xDEADBEEF)
3157                         break;
3158                 DRM_UDELAY(1);
3159         }
3160         if (i < rdev->usec_timeout) {
3161                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3162         } else {
3163                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3164                           scratch, tmp);
3165                 r = -EINVAL;
3166         }
3167         radeon_scratch_free(rdev, scratch);
3168         radeon_ib_free(rdev, &ib);
3169         return r;
3170 }
3171
3172 /*
3173  * CP.
3174  * On CIK, gfx and compute now have independant command processors.
3175  *
3176  * GFX
3177  * Gfx consists of a single ring and can process both gfx jobs and
3178  * compute jobs.  The gfx CP consists of three microengines (ME):
3179  * PFP - Pre-Fetch Parser
3180  * ME - Micro Engine
3181  * CE - Constant Engine
3182  * The PFP and ME make up what is considered the Drawing Engine (DE).
3183  * The CE is an asynchronous engine used for updating buffer desciptors
3184  * used by the DE so that they can be loaded into cache in parallel
3185  * while the DE is processing state update packets.
3186  *
3187  * Compute
3188  * The compute CP consists of two microengines (ME):
3189  * MEC1 - Compute MicroEngine 1
3190  * MEC2 - Compute MicroEngine 2
3191  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3192  * The queues are exposed to userspace and are programmed directly
3193  * by the compute runtime.
3194  */
3195 /**
3196  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3197  *
3198  * @rdev: radeon_device pointer
3199  * @enable: enable or disable the MEs
3200  *
3201  * Halts or unhalts the gfx MEs.
3202  */
3203 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3204 {
3205         if (enable)
3206                 WREG32(CP_ME_CNTL, 0);
3207         else {
3208                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3209                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3210         }
3211         udelay(50);
3212 }
3213
3214 /**
3215  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3216  *
3217  * @rdev: radeon_device pointer
3218  *
3219  * Loads the gfx PFP, ME, and CE ucode.
3220  * Returns 0 for success, -EINVAL if the ucode is not available.
3221  */
3222 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3223 {
3224         const __be32 *fw_data;
3225         int i;
3226
3227         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3228                 return -EINVAL;
3229
3230         cik_cp_gfx_enable(rdev, false);
3231
3232         /* PFP */
3233         fw_data = (const __be32 *)rdev->pfp_fw->data;
3234         WREG32(CP_PFP_UCODE_ADDR, 0);
3235         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3236                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3237         WREG32(CP_PFP_UCODE_ADDR, 0);
3238
3239         /* CE */
3240         fw_data = (const __be32 *)rdev->ce_fw->data;
3241         WREG32(CP_CE_UCODE_ADDR, 0);
3242         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3243                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3244         WREG32(CP_CE_UCODE_ADDR, 0);
3245
3246         /* ME */
3247         fw_data = (const __be32 *)rdev->me_fw->data;
3248         WREG32(CP_ME_RAM_WADDR, 0);
3249         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3250                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3251         WREG32(CP_ME_RAM_WADDR, 0);
3252
3253         WREG32(CP_PFP_UCODE_ADDR, 0);
3254         WREG32(CP_CE_UCODE_ADDR, 0);
3255         WREG32(CP_ME_RAM_WADDR, 0);
3256         WREG32(CP_ME_RAM_RADDR, 0);
3257         return 0;
3258 }
3259
3260 /**
3261  * cik_cp_gfx_start - start the gfx ring
3262  *
3263  * @rdev: radeon_device pointer
3264  *
3265  * Enables the ring and loads the clear state context and other
3266  * packets required to init the ring.
3267  * Returns 0 for success, error for failure.
3268  */
3269 static int cik_cp_gfx_start(struct radeon_device *rdev)
3270 {
3271         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3272         int r, i;
3273
3274         /* init the CP */
3275         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3276         WREG32(CP_ENDIAN_SWAP, 0);
3277         WREG32(CP_DEVICE_ID, 1);
3278
3279         cik_cp_gfx_enable(rdev, true);
3280
3281         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3282         if (r) {
3283                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3284                 return r;
3285         }
3286
3287         /* init the CE partitions.  CE only used for gfx on CIK */
3288         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3289         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3290         radeon_ring_write(ring, 0xc000);
3291         radeon_ring_write(ring, 0xc000);
3292
3293         /* setup clear context state */
3294         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3295         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3296
3297         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3298         radeon_ring_write(ring, 0x80000000);
3299         radeon_ring_write(ring, 0x80000000);
3300
3301         for (i = 0; i < cik_default_size; i++)
3302                 radeon_ring_write(ring, cik_default_state[i]);
3303
3304         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3305         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3306
3307         /* set clear context state */
3308         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3309         radeon_ring_write(ring, 0);
3310
3311         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3312         radeon_ring_write(ring, 0x00000316);
3313         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3314         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3315
3316         radeon_ring_unlock_commit(rdev, ring);
3317
3318         return 0;
3319 }
3320
3321 /**
3322  * cik_cp_gfx_fini - stop the gfx ring
3323  *
3324  * @rdev: radeon_device pointer
3325  *
3326  * Stop the gfx ring and tear down the driver ring
3327  * info.
3328  */
3329 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3330 {
3331         cik_cp_gfx_enable(rdev, false);
3332         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3333 }
3334
3335 /**
3336  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3337  *
3338  * @rdev: radeon_device pointer
3339  *
3340  * Program the location and size of the gfx ring buffer
3341  * and test it to make sure it's working.
3342  * Returns 0 for success, error for failure.
3343  */
3344 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3345 {
3346         struct radeon_ring *ring;
3347         u32 tmp;
3348         u32 rb_bufsz;
3349         u64 rb_addr;
3350         int r;
3351
3352         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3353         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3354
3355         /* Set the write pointer delay */
3356         WREG32(CP_RB_WPTR_DELAY, 0);
3357
3358         /* set the RB to use vmid 0 */
3359         WREG32(CP_RB_VMID, 0);
3360
3361         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3362
3363         /* ring 0 - compute and gfx */
3364         /* Set ring buffer size */
3365         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3366         rb_bufsz = drm_order(ring->ring_size / 8);
3367         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3368 #ifdef __BIG_ENDIAN
3369         tmp |= BUF_SWAP_32BIT;
3370 #endif
3371         WREG32(CP_RB0_CNTL, tmp);
3372
3373         /* Initialize the ring buffer's read and write pointers */
3374         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3375         ring->wptr = 0;
3376         WREG32(CP_RB0_WPTR, ring->wptr);
3377
3378         /* set the wb address wether it's enabled or not */
3379         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3380         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3381
3382         /* scratch register shadowing is no longer supported */
3383         WREG32(SCRATCH_UMSK, 0);
3384
3385         if (!rdev->wb.enabled)
3386                 tmp |= RB_NO_UPDATE;
3387
3388         mdelay(1);
3389         WREG32(CP_RB0_CNTL, tmp);
3390
3391         rb_addr = ring->gpu_addr >> 8;
3392         WREG32(CP_RB0_BASE, rb_addr);
3393         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3394
3395         ring->rptr = RREG32(CP_RB0_RPTR);
3396
3397         /* start the ring */
3398         cik_cp_gfx_start(rdev);
3399         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3400         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3401         if (r) {
3402                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3403                 return r;
3404         }
3405         return 0;
3406 }
3407
3408 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3409                               struct radeon_ring *ring)
3410 {
3411         u32 rptr;
3412
3413
3414
3415         if (rdev->wb.enabled) {
3416                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3417         } else {
3418                 mutex_lock(&rdev->srbm_mutex);
3419                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3420                 rptr = RREG32(CP_HQD_PQ_RPTR);
3421                 cik_srbm_select(rdev, 0, 0, 0, 0);
3422                 mutex_unlock(&rdev->srbm_mutex);
3423         }
3424
3425         return rptr;
3426 }
3427
3428 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3429                               struct radeon_ring *ring)
3430 {
3431         u32 wptr;
3432
3433         if (rdev->wb.enabled) {
3434                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3435         } else {
3436                 mutex_lock(&rdev->srbm_mutex);
3437                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3438                 wptr = RREG32(CP_HQD_PQ_WPTR);
3439                 cik_srbm_select(rdev, 0, 0, 0, 0);
3440                 mutex_unlock(&rdev->srbm_mutex);
3441         }
3442
3443         return wptr;
3444 }
3445
3446 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3447                                struct radeon_ring *ring)
3448 {
3449         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3450         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3451 }
3452
3453 /**
3454  * cik_cp_compute_enable - enable/disable the compute CP MEs
3455  *
3456  * @rdev: radeon_device pointer
3457  * @enable: enable or disable the MEs
3458  *
3459  * Halts or unhalts the compute MEs.
3460  */
3461 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3462 {
3463         if (enable)
3464                 WREG32(CP_MEC_CNTL, 0);
3465         else
3466                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3467         udelay(50);
3468 }
3469
3470 /**
3471  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3472  *
3473  * @rdev: radeon_device pointer
3474  *
3475  * Loads the compute MEC1&2 ucode.
3476  * Returns 0 for success, -EINVAL if the ucode is not available.
3477  */
3478 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3479 {
3480         const __be32 *fw_data;
3481         int i;
3482
3483         if (!rdev->mec_fw)
3484                 return -EINVAL;
3485
3486         cik_cp_compute_enable(rdev, false);
3487
3488         /* MEC1 */
3489         fw_data = (const __be32 *)rdev->mec_fw->data;
3490         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3491         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3492                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3493         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3494
3495         if (rdev->family == CHIP_KAVERI) {
3496                 /* MEC2 */
3497                 fw_data = (const __be32 *)rdev->mec_fw->data;
3498                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3499                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3500                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3501                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3502         }
3503
3504         return 0;
3505 }
3506
3507 /**
3508  * cik_cp_compute_start - start the compute queues
3509  *
3510  * @rdev: radeon_device pointer
3511  *
3512  * Enable the compute queues.
3513  * Returns 0 for success, error for failure.
3514  */
3515 static int cik_cp_compute_start(struct radeon_device *rdev)
3516 {
3517         cik_cp_compute_enable(rdev, true);
3518
3519         return 0;
3520 }
3521
3522 /**
3523  * cik_cp_compute_fini - stop the compute queues
3524  *
3525  * @rdev: radeon_device pointer
3526  *
3527  * Stop the compute queues and tear down the driver queue
3528  * info.
3529  */
3530 static void cik_cp_compute_fini(struct radeon_device *rdev)
3531 {
3532         int i, idx, r;
3533
3534         cik_cp_compute_enable(rdev, false);
3535
3536         for (i = 0; i < 2; i++) {
3537                 if (i == 0)
3538                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3539                 else
3540                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3541
3542                 if (rdev->ring[idx].mqd_obj) {
3543                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3544                         if (unlikely(r != 0))
3545                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3546
3547                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3548                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3549
3550                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3551                         rdev->ring[idx].mqd_obj = NULL;
3552                 }
3553         }
3554 }
3555
3556 static void cik_mec_fini(struct radeon_device *rdev)
3557 {
3558         int r;
3559
3560         if (rdev->mec.hpd_eop_obj) {
3561                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3562                 if (unlikely(r != 0))
3563                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3564                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3565                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3566
3567                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3568                 rdev->mec.hpd_eop_obj = NULL;
3569         }
3570 }
3571
3572 #define MEC_HPD_SIZE 2048
3573
3574 static int cik_mec_init(struct radeon_device *rdev)
3575 {
3576         int r;
3577         u32 *hpd;
3578
3579         /*
3580          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3581          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3582          */
3583         if (rdev->family == CHIP_KAVERI)
3584                 rdev->mec.num_mec = 2;
3585         else
3586                 rdev->mec.num_mec = 1;
3587         rdev->mec.num_pipe = 4;
3588         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3589
3590         if (rdev->mec.hpd_eop_obj == NULL) {
3591                 r = radeon_bo_create(rdev,
3592                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3593                                      PAGE_SIZE, true,
3594                                      RADEON_GEM_DOMAIN_GTT, NULL,
3595                                      &rdev->mec.hpd_eop_obj);
3596                 if (r) {
3597                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3598                         return r;
3599                 }
3600         }
3601
3602         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3603         if (unlikely(r != 0)) {
3604                 cik_mec_fini(rdev);
3605                 return r;
3606         }
3607         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3608                           &rdev->mec.hpd_eop_gpu_addr);
3609         if (r) {
3610                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3611                 cik_mec_fini(rdev);
3612                 return r;
3613         }
3614         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3615         if (r) {
3616                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3617                 cik_mec_fini(rdev);
3618                 return r;
3619         }
3620
3621         /* clear memory.  Not sure if this is required or not */
3622         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3623
3624         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3625         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3626
3627         return 0;
3628 }
3629
3630 struct hqd_registers
3631 {
3632         u32 cp_mqd_base_addr;
3633         u32 cp_mqd_base_addr_hi;
3634         u32 cp_hqd_active;
3635         u32 cp_hqd_vmid;
3636         u32 cp_hqd_persistent_state;
3637         u32 cp_hqd_pipe_priority;
3638         u32 cp_hqd_queue_priority;
3639         u32 cp_hqd_quantum;
3640         u32 cp_hqd_pq_base;
3641         u32 cp_hqd_pq_base_hi;
3642         u32 cp_hqd_pq_rptr;
3643         u32 cp_hqd_pq_rptr_report_addr;
3644         u32 cp_hqd_pq_rptr_report_addr_hi;
3645         u32 cp_hqd_pq_wptr_poll_addr;
3646         u32 cp_hqd_pq_wptr_poll_addr_hi;
3647         u32 cp_hqd_pq_doorbell_control;
3648         u32 cp_hqd_pq_wptr;
3649         u32 cp_hqd_pq_control;
3650         u32 cp_hqd_ib_base_addr;
3651         u32 cp_hqd_ib_base_addr_hi;
3652         u32 cp_hqd_ib_rptr;
3653         u32 cp_hqd_ib_control;
3654         u32 cp_hqd_iq_timer;
3655         u32 cp_hqd_iq_rptr;
3656         u32 cp_hqd_dequeue_request;
3657         u32 cp_hqd_dma_offload;
3658         u32 cp_hqd_sema_cmd;
3659         u32 cp_hqd_msg_type;
3660         u32 cp_hqd_atomic0_preop_lo;
3661         u32 cp_hqd_atomic0_preop_hi;
3662         u32 cp_hqd_atomic1_preop_lo;
3663         u32 cp_hqd_atomic1_preop_hi;
3664         u32 cp_hqd_hq_scheduler0;
3665         u32 cp_hqd_hq_scheduler1;
3666         u32 cp_mqd_control;
3667 };
3668
3669 struct bonaire_mqd
3670 {
3671         u32 header;
3672         u32 dispatch_initiator;
3673         u32 dimensions[3];
3674         u32 start_idx[3];
3675         u32 num_threads[3];
3676         u32 pipeline_stat_enable;
3677         u32 perf_counter_enable;
3678         u32 pgm[2];
3679         u32 tba[2];
3680         u32 tma[2];
3681         u32 pgm_rsrc[2];
3682         u32 vmid;
3683         u32 resource_limits;
3684         u32 static_thread_mgmt01[2];
3685         u32 tmp_ring_size;
3686         u32 static_thread_mgmt23[2];
3687         u32 restart[3];
3688         u32 thread_trace_enable;
3689         u32 reserved1;
3690         u32 user_data[16];
3691         u32 vgtcs_invoke_count[2];
3692         struct hqd_registers queue_state;
3693         u32 dequeue_cntr;
3694         u32 interrupt_queue[64];
3695 };
3696
3697 /**
3698  * cik_cp_compute_resume - setup the compute queue registers
3699  *
3700  * @rdev: radeon_device pointer
3701  *
3702  * Program the compute queues and test them to make sure they
3703  * are working.
3704  * Returns 0 for success, error for failure.
3705  */
3706 static int cik_cp_compute_resume(struct radeon_device *rdev)
3707 {
3708         int r, i, idx;
3709         u32 tmp;
3710         bool use_doorbell = true;
3711         u64 hqd_gpu_addr;
3712         u64 mqd_gpu_addr;
3713         u64 eop_gpu_addr;
3714         u64 wb_gpu_addr;
3715         u32 *buf;
3716         struct bonaire_mqd *mqd;
3717
3718         r = cik_cp_compute_start(rdev);
3719         if (r)
3720                 return r;
3721
3722         /* fix up chicken bits */
3723         tmp = RREG32(CP_CPF_DEBUG);
3724         tmp |= (1 << 23);
3725         WREG32(CP_CPF_DEBUG, tmp);
3726
3727         /* init the pipes */
3728         mutex_lock(&rdev->srbm_mutex);
3729         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3730                 int me = (i < 4) ? 1 : 2;
3731                 int pipe = (i < 4) ? i : (i - 4);
3732
3733                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3734
3735                 cik_srbm_select(rdev, me, pipe, 0, 0);
3736
3737                 /* write the EOP addr */
3738                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3739                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3740
3741                 /* set the VMID assigned */
3742                 WREG32(CP_HPD_EOP_VMID, 0);
3743
3744                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3745                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3746                 tmp &= ~EOP_SIZE_MASK;
3747                 tmp |= drm_order(MEC_HPD_SIZE / 8);
3748                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3749         }
3750         cik_srbm_select(rdev, 0, 0, 0, 0);
3751         mutex_unlock(&rdev->srbm_mutex);
3752
3753         /* init the queues.  Just two for now. */
3754         for (i = 0; i < 2; i++) {
3755                 if (i == 0)
3756                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3757                 else
3758                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3759
3760                 if (rdev->ring[idx].mqd_obj == NULL) {
3761                         r = radeon_bo_create(rdev,
3762                                              sizeof(struct bonaire_mqd),
3763                                              PAGE_SIZE, true,
3764                                              RADEON_GEM_DOMAIN_GTT, NULL,
3765                                              &rdev->ring[idx].mqd_obj);
3766                         if (r) {
3767                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3768                                 return r;
3769                         }
3770                 }
3771
3772                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3773                 if (unlikely(r != 0)) {
3774                         cik_cp_compute_fini(rdev);
3775                         return r;
3776                 }
3777                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3778                                   &mqd_gpu_addr);
3779                 if (r) {
3780                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3781                         cik_cp_compute_fini(rdev);
3782                         return r;
3783                 }
3784                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3785                 if (r) {
3786                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3787                         cik_cp_compute_fini(rdev);
3788                         return r;
3789                 }
3790
3791                 /* doorbell offset */
3792                 rdev->ring[idx].doorbell_offset =
3793                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3794
3795                 /* init the mqd struct */
3796                 memset(buf, 0, sizeof(struct bonaire_mqd));
3797
3798                 mqd = (struct bonaire_mqd *)buf;
3799                 mqd->header = 0xC0310800;
3800                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3801                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3802                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3803                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3804
3805                 mutex_lock(&rdev->srbm_mutex);
3806                 cik_srbm_select(rdev, rdev->ring[idx].me,
3807                                 rdev->ring[idx].pipe,
3808                                 rdev->ring[idx].queue, 0);
3809
3810                 /* disable wptr polling */
3811                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3812                 tmp &= ~WPTR_POLL_EN;
3813                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3814
3815                 /* enable doorbell? */
3816                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3817                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3818                 if (use_doorbell)
3819                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3820                 else
3821                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3822                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3823                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3824
3825                 /* disable the queue if it's active */
3826                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3827                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3828                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3829                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3830                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3831                         for (i = 0; i < rdev->usec_timeout; i++) {
3832                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3833                                         break;
3834                                 udelay(1);
3835                         }
3836                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3837                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3838                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3839                 }
3840
3841                 /* set the pointer to the MQD */
3842                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3843                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3844                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3845                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3846                 /* set MQD vmid to 0 */
3847                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3848                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3849                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3850
3851                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3852                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3853                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3854                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3855                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3856                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3857
3858                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3859                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3860                 mqd->queue_state.cp_hqd_pq_control &=
3861                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3862
3863                 mqd->queue_state.cp_hqd_pq_control |=
3864                         drm_order(rdev->ring[idx].ring_size / 8);
3865                 mqd->queue_state.cp_hqd_pq_control |=
3866                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3867 #ifdef __BIG_ENDIAN
3868                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3869 #endif
3870                 mqd->queue_state.cp_hqd_pq_control &=
3871                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3872                 mqd->queue_state.cp_hqd_pq_control |=
3873                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3874                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3875
3876                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3877                 if (i == 0)
3878                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3879                 else
3880                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3881                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3882                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3883                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3884                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3885                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3886
3887                 /* set the wb address wether it's enabled or not */
3888                 if (i == 0)
3889                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3890                 else
3891                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3892                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3893                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3894                         upper_32_bits(wb_gpu_addr) & 0xffff;
3895                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3896                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3897                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3898                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3899
3900                 /* enable the doorbell if requested */
3901                 if (use_doorbell) {
3902                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3903                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3904                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3905                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3906                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3907                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3908                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3909                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3910
3911                 } else {
3912                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3913                 }
3914                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3915                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3916
3917                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3918                 rdev->ring[idx].wptr = 0;
3919                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3920                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3921                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3922                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3923
3924                 /* set the vmid for the queue */
3925                 mqd->queue_state.cp_hqd_vmid = 0;
3926                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3927
3928                 /* activate the queue */
3929                 mqd->queue_state.cp_hqd_active = 1;
3930                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3931
3932                 cik_srbm_select(rdev, 0, 0, 0, 0);
3933                 mutex_unlock(&rdev->srbm_mutex);
3934
3935                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3936                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3937
3938                 rdev->ring[idx].ready = true;
3939                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3940                 if (r)
3941                         rdev->ring[idx].ready = false;
3942         }
3943
3944         return 0;
3945 }
3946
3947 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3948 {
3949         cik_cp_gfx_enable(rdev, enable);
3950         cik_cp_compute_enable(rdev, enable);
3951 }
3952
3953 static int cik_cp_load_microcode(struct radeon_device *rdev)
3954 {
3955         int r;
3956
3957         r = cik_cp_gfx_load_microcode(rdev);
3958         if (r)
3959                 return r;
3960         r = cik_cp_compute_load_microcode(rdev);
3961         if (r)
3962                 return r;
3963
3964         return 0;
3965 }
3966
3967 static void cik_cp_fini(struct radeon_device *rdev)
3968 {
3969         cik_cp_gfx_fini(rdev);
3970         cik_cp_compute_fini(rdev);
3971 }
3972
3973 static int cik_cp_resume(struct radeon_device *rdev)
3974 {
3975         int r;
3976
3977         /* Reset all cp blocks */
3978         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3979         RREG32(GRBM_SOFT_RESET);
3980         mdelay(15);
3981         WREG32(GRBM_SOFT_RESET, 0);
3982         RREG32(GRBM_SOFT_RESET);
3983
3984         r = cik_cp_load_microcode(rdev);
3985         if (r)
3986                 return r;
3987
3988         r = cik_cp_gfx_resume(rdev);
3989         if (r)
3990                 return r;
3991         r = cik_cp_compute_resume(rdev);
3992         if (r)
3993                 return r;
3994
3995         return 0;
3996 }
3997
3998 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3999 {
4000         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4001                 RREG32(GRBM_STATUS));
4002         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4003                 RREG32(GRBM_STATUS2));
4004         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4005                 RREG32(GRBM_STATUS_SE0));
4006         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4007                 RREG32(GRBM_STATUS_SE1));
4008         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4009                 RREG32(GRBM_STATUS_SE2));
4010         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4011                 RREG32(GRBM_STATUS_SE3));
4012         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4013                 RREG32(SRBM_STATUS));
4014         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4015                 RREG32(SRBM_STATUS2));
4016         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4017                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4018         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4019                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4020         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4021         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4022                  RREG32(CP_STALLED_STAT1));
4023         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4024                  RREG32(CP_STALLED_STAT2));
4025         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4026                  RREG32(CP_STALLED_STAT3));
4027         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4028                  RREG32(CP_CPF_BUSY_STAT));
4029         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4030                  RREG32(CP_CPF_STALLED_STAT1));
4031         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4032         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4033         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4034                  RREG32(CP_CPC_STALLED_STAT1));
4035         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4036 }
4037
4038 /**
4039  * cik_gpu_check_soft_reset - check which blocks are busy
4040  *
4041  * @rdev: radeon_device pointer
4042  *
4043  * Check which blocks are busy and return the relevant reset
4044  * mask to be used by cik_gpu_soft_reset().
4045  * Returns a mask of the blocks to be reset.
4046  */
4047 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4048 {
4049         u32 reset_mask = 0;
4050         u32 tmp;
4051
4052         /* GRBM_STATUS */
4053         tmp = RREG32(GRBM_STATUS);
4054         if (tmp & (PA_BUSY | SC_BUSY |
4055                    BCI_BUSY | SX_BUSY |
4056                    TA_BUSY | VGT_BUSY |
4057                    DB_BUSY | CB_BUSY |
4058                    GDS_BUSY | SPI_BUSY |
4059                    IA_BUSY | IA_BUSY_NO_DMA))
4060                 reset_mask |= RADEON_RESET_GFX;
4061
4062         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4063                 reset_mask |= RADEON_RESET_CP;
4064
4065         /* GRBM_STATUS2 */
4066         tmp = RREG32(GRBM_STATUS2);
4067         if (tmp & RLC_BUSY)
4068                 reset_mask |= RADEON_RESET_RLC;
4069
4070         /* SDMA0_STATUS_REG */
4071         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4072         if (!(tmp & SDMA_IDLE))
4073                 reset_mask |= RADEON_RESET_DMA;
4074
4075         /* SDMA1_STATUS_REG */
4076         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4077         if (!(tmp & SDMA_IDLE))
4078                 reset_mask |= RADEON_RESET_DMA1;
4079
4080         /* SRBM_STATUS2 */
4081         tmp = RREG32(SRBM_STATUS2);
4082         if (tmp & SDMA_BUSY)
4083                 reset_mask |= RADEON_RESET_DMA;
4084
4085         if (tmp & SDMA1_BUSY)
4086                 reset_mask |= RADEON_RESET_DMA1;
4087
4088         /* SRBM_STATUS */
4089         tmp = RREG32(SRBM_STATUS);
4090
4091         if (tmp & IH_BUSY)
4092                 reset_mask |= RADEON_RESET_IH;
4093
4094         if (tmp & SEM_BUSY)
4095                 reset_mask |= RADEON_RESET_SEM;
4096
4097         if (tmp & GRBM_RQ_PENDING)
4098                 reset_mask |= RADEON_RESET_GRBM;
4099
4100         if (tmp & VMC_BUSY)
4101                 reset_mask |= RADEON_RESET_VMC;
4102
4103         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4104                    MCC_BUSY | MCD_BUSY))
4105                 reset_mask |= RADEON_RESET_MC;
4106
4107         if (evergreen_is_display_hung(rdev))
4108                 reset_mask |= RADEON_RESET_DISPLAY;
4109
4110         /* Skip MC reset as it's mostly likely not hung, just busy */
4111         if (reset_mask & RADEON_RESET_MC) {
4112                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4113                 reset_mask &= ~RADEON_RESET_MC;
4114         }
4115
4116         return reset_mask;
4117 }
4118
4119 /**
4120  * cik_gpu_soft_reset - soft reset GPU
4121  *
4122  * @rdev: radeon_device pointer
4123  * @reset_mask: mask of which blocks to reset
4124  *
4125  * Soft reset the blocks specified in @reset_mask.
4126  */
4127 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4128 {
4129         struct evergreen_mc_save save;
4130         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4131         u32 tmp;
4132
4133         if (reset_mask == 0)
4134                 return;
4135
4136         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4137
4138         cik_print_gpu_status_regs(rdev);
4139         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4140                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4141         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4142                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4143
4144         /* stop the rlc */
4145         cik_rlc_stop(rdev);
4146
4147         /* Disable GFX parsing/prefetching */
4148         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4149
4150         /* Disable MEC parsing/prefetching */
4151         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4152
4153         if (reset_mask & RADEON_RESET_DMA) {
4154                 /* sdma0 */
4155                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4156                 tmp |= SDMA_HALT;
4157                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4158         }
4159         if (reset_mask & RADEON_RESET_DMA1) {
4160                 /* sdma1 */
4161                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4162                 tmp |= SDMA_HALT;
4163                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4164         }
4165
4166         evergreen_mc_stop(rdev, &save);
4167         if (evergreen_mc_wait_for_idle(rdev)) {
4168                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4169         }
4170
4171         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4172                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4173
4174         if (reset_mask & RADEON_RESET_CP) {
4175                 grbm_soft_reset |= SOFT_RESET_CP;
4176
4177                 srbm_soft_reset |= SOFT_RESET_GRBM;
4178         }
4179
4180         if (reset_mask & RADEON_RESET_DMA)
4181                 srbm_soft_reset |= SOFT_RESET_SDMA;
4182
4183         if (reset_mask & RADEON_RESET_DMA1)
4184                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4185
4186         if (reset_mask & RADEON_RESET_DISPLAY)
4187                 srbm_soft_reset |= SOFT_RESET_DC;
4188
4189         if (reset_mask & RADEON_RESET_RLC)
4190                 grbm_soft_reset |= SOFT_RESET_RLC;
4191
4192         if (reset_mask & RADEON_RESET_SEM)
4193                 srbm_soft_reset |= SOFT_RESET_SEM;
4194
4195         if (reset_mask & RADEON_RESET_IH)
4196                 srbm_soft_reset |= SOFT_RESET_IH;
4197
4198         if (reset_mask & RADEON_RESET_GRBM)
4199                 srbm_soft_reset |= SOFT_RESET_GRBM;
4200
4201         if (reset_mask & RADEON_RESET_VMC)
4202                 srbm_soft_reset |= SOFT_RESET_VMC;
4203
4204         if (!(rdev->flags & RADEON_IS_IGP)) {
4205                 if (reset_mask & RADEON_RESET_MC)
4206                         srbm_soft_reset |= SOFT_RESET_MC;
4207         }
4208
4209         if (grbm_soft_reset) {
4210                 tmp = RREG32(GRBM_SOFT_RESET);
4211                 tmp |= grbm_soft_reset;
4212                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4213                 WREG32(GRBM_SOFT_RESET, tmp);
4214                 tmp = RREG32(GRBM_SOFT_RESET);
4215
4216                 udelay(50);
4217
4218                 tmp &= ~grbm_soft_reset;
4219                 WREG32(GRBM_SOFT_RESET, tmp);
4220                 tmp = RREG32(GRBM_SOFT_RESET);
4221         }
4222
4223         if (srbm_soft_reset) {
4224                 tmp = RREG32(SRBM_SOFT_RESET);
4225                 tmp |= srbm_soft_reset;
4226                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4227                 WREG32(SRBM_SOFT_RESET, tmp);
4228                 tmp = RREG32(SRBM_SOFT_RESET);
4229
4230                 udelay(50);
4231
4232                 tmp &= ~srbm_soft_reset;
4233                 WREG32(SRBM_SOFT_RESET, tmp);
4234                 tmp = RREG32(SRBM_SOFT_RESET);
4235         }
4236
4237         /* Wait a little for things to settle down */
4238         udelay(50);
4239
4240         evergreen_mc_resume(rdev, &save);
4241         udelay(50);
4242
4243         cik_print_gpu_status_regs(rdev);
4244 }
4245
4246 /**
4247  * cik_asic_reset - soft reset GPU
4248  *
4249  * @rdev: radeon_device pointer
4250  *
4251  * Look up which blocks are hung and attempt
4252  * to reset them.
4253  * Returns 0 for success.
4254  */
4255 int cik_asic_reset(struct radeon_device *rdev)
4256 {
4257         u32 reset_mask;
4258
4259         reset_mask = cik_gpu_check_soft_reset(rdev);
4260
4261         if (reset_mask)
4262                 r600_set_bios_scratch_engine_hung(rdev, true);
4263
4264         cik_gpu_soft_reset(rdev, reset_mask);
4265
4266         reset_mask = cik_gpu_check_soft_reset(rdev);
4267
4268         if (!reset_mask)
4269                 r600_set_bios_scratch_engine_hung(rdev, false);
4270
4271         return 0;
4272 }
4273
4274 /**
4275  * cik_gfx_is_lockup - check if the 3D engine is locked up
4276  *
4277  * @rdev: radeon_device pointer
4278  * @ring: radeon_ring structure holding ring information
4279  *
4280  * Check if the 3D engine is locked up (CIK).
4281  * Returns true if the engine is locked, false if not.
4282  */
4283 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4284 {
4285         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4286
4287         if (!(reset_mask & (RADEON_RESET_GFX |
4288                             RADEON_RESET_COMPUTE |
4289                             RADEON_RESET_CP))) {
4290                 radeon_ring_lockup_update(ring);
4291                 return false;
4292         }
4293         /* force CP activities */
4294         radeon_ring_force_activity(rdev, ring);
4295         return radeon_ring_test_lockup(rdev, ring);
4296 }
4297
4298 /* MC */
4299 /**
4300  * cik_mc_program - program the GPU memory controller
4301  *
4302  * @rdev: radeon_device pointer
4303  *
4304  * Set the location of vram, gart, and AGP in the GPU's
4305  * physical address space (CIK).
4306  */
4307 static void cik_mc_program(struct radeon_device *rdev)
4308 {
4309         struct evergreen_mc_save save;
4310         u32 tmp;
4311         int i, j;
4312
4313         /* Initialize HDP */
4314         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4315                 WREG32((0x2c14 + j), 0x00000000);
4316                 WREG32((0x2c18 + j), 0x00000000);
4317                 WREG32((0x2c1c + j), 0x00000000);
4318                 WREG32((0x2c20 + j), 0x00000000);
4319                 WREG32((0x2c24 + j), 0x00000000);
4320         }
4321         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4322
4323         evergreen_mc_stop(rdev, &save);
4324         if (radeon_mc_wait_for_idle(rdev)) {
4325                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4326         }
4327         /* Lockout access through VGA aperture*/
4328         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4329         /* Update configuration */
4330         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4331                rdev->mc.vram_start >> 12);
4332         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4333                rdev->mc.vram_end >> 12);
4334         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4335                rdev->vram_scratch.gpu_addr >> 12);
4336         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4337         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4338         WREG32(MC_VM_FB_LOCATION, tmp);
4339         /* XXX double check these! */
4340         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4341         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4342         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4343         WREG32(MC_VM_AGP_BASE, 0);
4344         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4345         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4346         if (radeon_mc_wait_for_idle(rdev)) {
4347                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4348         }
4349         evergreen_mc_resume(rdev, &save);
4350         /* we need to own VRAM, so turn off the VGA renderer here
4351          * to stop it overwriting our objects */
4352         rv515_vga_render_disable(rdev);
4353 }
4354
4355 /**
4356  * cik_mc_init - initialize the memory controller driver params
4357  *
4358  * @rdev: radeon_device pointer
4359  *
4360  * Look up the amount of vram, vram width, and decide how to place
4361  * vram and gart within the GPU's physical address space (CIK).
4362  * Returns 0 for success.
4363  */
4364 static int cik_mc_init(struct radeon_device *rdev)
4365 {
4366         u32 tmp;
4367         int chansize, numchan;
4368
4369         /* Get VRAM informations */
4370         rdev->mc.vram_is_ddr = true;
4371         tmp = RREG32(MC_ARB_RAMCFG);
4372         if (tmp & CHANSIZE_MASK) {
4373                 chansize = 64;
4374         } else {
4375                 chansize = 32;
4376         }
4377         tmp = RREG32(MC_SHARED_CHMAP);
4378         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4379         case 0:
4380         default:
4381                 numchan = 1;
4382                 break;
4383         case 1:
4384                 numchan = 2;
4385                 break;
4386         case 2:
4387                 numchan = 4;
4388                 break;
4389         case 3:
4390                 numchan = 8;
4391                 break;
4392         case 4:
4393                 numchan = 3;
4394                 break;
4395         case 5:
4396                 numchan = 6;
4397                 break;
4398         case 6:
4399                 numchan = 10;
4400                 break;
4401         case 7:
4402                 numchan = 12;
4403                 break;
4404         case 8:
4405                 numchan = 16;
4406                 break;
4407         }
4408         rdev->mc.vram_width = numchan * chansize;
4409         /* Could aper size report 0 ? */
4410         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4411         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4412         /* size in MB on si */
4413         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4414         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4415         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4416         si_vram_gtt_location(rdev, &rdev->mc);
4417         radeon_update_bandwidth_info(rdev);
4418
4419         return 0;
4420 }
4421
4422 /*
4423  * GART
4424  * VMID 0 is the physical GPU addresses as used by the kernel.
4425  * VMIDs 1-15 are used for userspace clients and are handled
4426  * by the radeon vm/hsa code.
4427  */
4428 /**
4429  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4430  *
4431  * @rdev: radeon_device pointer
4432  *
4433  * Flush the TLB for the VMID 0 page table (CIK).
4434  */
4435 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4436 {
4437         /* flush hdp cache */
4438         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4439
4440         /* bits 0-15 are the VM contexts0-15 */
4441         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4442 }
4443
4444 /**
4445  * cik_pcie_gart_enable - gart enable
4446  *
4447  * @rdev: radeon_device pointer
4448  *
4449  * This sets up the TLBs, programs the page tables for VMID0,
4450  * sets up the hw for VMIDs 1-15 which are allocated on
4451  * demand, and sets up the global locations for the LDS, GDS,
4452  * and GPUVM for FSA64 clients (CIK).
4453  * Returns 0 for success, errors for failure.
4454  */
4455 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4456 {
4457         int r, i;
4458
4459         if (rdev->gart.robj == NULL) {
4460                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4461                 return -EINVAL;
4462         }
4463         r = radeon_gart_table_vram_pin(rdev);
4464         if (r)
4465                 return r;
4466         radeon_gart_restore(rdev);
4467         /* Setup TLB control */
4468         WREG32(MC_VM_MX_L1_TLB_CNTL,
4469                (0xA << 7) |
4470                ENABLE_L1_TLB |
4471                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4472                ENABLE_ADVANCED_DRIVER_MODEL |
4473                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4474         /* Setup L2 cache */
4475         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4476                ENABLE_L2_FRAGMENT_PROCESSING |
4477                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4478                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4479                EFFECTIVE_L2_QUEUE_SIZE(7) |
4480                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4481         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4482         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4483                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4484         /* setup context0 */
4485         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4486         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4487         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4488         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4489                         (u32)(rdev->dummy_page.addr >> 12));
4490         WREG32(VM_CONTEXT0_CNTL2, 0);
4491         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4492                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4493
4494         WREG32(0x15D4, 0);
4495         WREG32(0x15D8, 0);
4496         WREG32(0x15DC, 0);
4497
4498         /* empty context1-15 */
4499         /* FIXME start with 4G, once using 2 level pt switch to full
4500          * vm size space
4501          */
4502         /* set vm size, must be a multiple of 4 */
4503         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4504         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4505         for (i = 1; i < 16; i++) {
4506                 if (i < 8)
4507                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4508                                rdev->gart.table_addr >> 12);
4509                 else
4510                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4511                                rdev->gart.table_addr >> 12);
4512         }
4513
4514         /* enable context1-15 */
4515         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4516                (u32)(rdev->dummy_page.addr >> 12));
4517         WREG32(VM_CONTEXT1_CNTL2, 4);
4518         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4519                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4520                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4521                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4522                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4523                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4524                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4525                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4526                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4527                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4528                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4529                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4530                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4531
4532         /* TC cache setup ??? */
4533         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4534         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4535         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4536
4537         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4538         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4539         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4540         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4541         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4542
4543         WREG32(TC_CFG_L1_VOLATILE, 0);
4544         WREG32(TC_CFG_L2_VOLATILE, 0);
4545
4546         if (rdev->family == CHIP_KAVERI) {
4547                 u32 tmp = RREG32(CHUB_CONTROL);
4548                 tmp &= ~BYPASS_VM;
4549                 WREG32(CHUB_CONTROL, tmp);
4550         }
4551
4552         /* XXX SH_MEM regs */
4553         /* where to put LDS, scratch, GPUVM in FSA64 space */
4554         mutex_lock(&rdev->srbm_mutex);
4555         for (i = 0; i < 16; i++) {
4556                 cik_srbm_select(rdev, 0, 0, 0, i);
4557                 /* CP and shaders */
4558                 WREG32(SH_MEM_CONFIG, 0);
4559                 WREG32(SH_MEM_APE1_BASE, 1);
4560                 WREG32(SH_MEM_APE1_LIMIT, 0);
4561                 WREG32(SH_MEM_BASES, 0);
4562                 /* SDMA GFX */
4563                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4564                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4565                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4566                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4567                 /* XXX SDMA RLC - todo */
4568         }
4569         cik_srbm_select(rdev, 0, 0, 0, 0);
4570         mutex_unlock(&rdev->srbm_mutex);
4571
4572         cik_pcie_gart_tlb_flush(rdev);
4573         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4574                  (unsigned)(rdev->mc.gtt_size >> 20),
4575                  (unsigned long long)rdev->gart.table_addr);
4576         rdev->gart.ready = true;
4577         return 0;
4578 }
4579
4580 /**
4581  * cik_pcie_gart_disable - gart disable
4582  *
4583  * @rdev: radeon_device pointer
4584  *
4585  * This disables all VM page table (CIK).
4586  */
4587 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4588 {
4589         /* Disable all tables */
4590         WREG32(VM_CONTEXT0_CNTL, 0);
4591         WREG32(VM_CONTEXT1_CNTL, 0);
4592         /* Setup TLB control */
4593         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4594                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4595         /* Setup L2 cache */
4596         WREG32(VM_L2_CNTL,
4597                ENABLE_L2_FRAGMENT_PROCESSING |
4598                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4599                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4600                EFFECTIVE_L2_QUEUE_SIZE(7) |
4601                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4602         WREG32(VM_L2_CNTL2, 0);
4603         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4604                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4605         radeon_gart_table_vram_unpin(rdev);
4606 }
4607
4608 /**
4609  * cik_pcie_gart_fini - vm fini callback
4610  *
4611  * @rdev: radeon_device pointer
4612  *
4613  * Tears down the driver GART/VM setup (CIK).
4614  */
4615 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4616 {
4617         cik_pcie_gart_disable(rdev);
4618         radeon_gart_table_vram_free(rdev);
4619         radeon_gart_fini(rdev);
4620 }
4621
4622 /* vm parser */
4623 /**
4624  * cik_ib_parse - vm ib_parse callback
4625  *
4626  * @rdev: radeon_device pointer
4627  * @ib: indirect buffer pointer
4628  *
4629  * CIK uses hw IB checking so this is a nop (CIK).
4630  */
4631 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4632 {
4633         return 0;
4634 }
4635
4636 /*
4637  * vm
4638  * VMID 0 is the physical GPU addresses as used by the kernel.
4639  * VMIDs 1-15 are used for userspace clients and are handled
4640  * by the radeon vm/hsa code.
4641  */
4642 /**
4643  * cik_vm_init - cik vm init callback
4644  *
4645  * @rdev: radeon_device pointer
4646  *
4647  * Inits cik specific vm parameters (number of VMs, base of vram for
4648  * VMIDs 1-15) (CIK).
4649  * Returns 0 for success.
4650  */
4651 int cik_vm_init(struct radeon_device *rdev)
4652 {
4653         /* number of VMs */
4654         rdev->vm_manager.nvm = 16;
4655         /* base offset of vram pages */
4656         if (rdev->flags & RADEON_IS_IGP) {
4657                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4658                 tmp <<= 22;
4659                 rdev->vm_manager.vram_base_offset = tmp;
4660         } else
4661                 rdev->vm_manager.vram_base_offset = 0;
4662
4663         return 0;
4664 }
4665
4666 /**
4667  * cik_vm_fini - cik vm fini callback
4668  *
4669  * @rdev: radeon_device pointer
4670  *
4671  * Tear down any asic specific VM setup (CIK).
4672  */
4673 void cik_vm_fini(struct radeon_device *rdev)
4674 {
4675 }
4676
4677 /**
4678  * cik_vm_decode_fault - print human readable fault info
4679  *
4680  * @rdev: radeon_device pointer
4681  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4682  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4683  *
4684  * Print human readable fault information (CIK).
4685  */
4686 static void cik_vm_decode_fault(struct radeon_device *rdev,
4687                                 u32 status, u32 addr, u32 mc_client)
4688 {
4689         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4690         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4691         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4692         char *block = (char *)&mc_client;
4693
4694         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4695                protections, vmid, addr,
4696                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4697                block, mc_id);
4698 }
4699
4700 /**
4701  * cik_vm_flush - cik vm flush using the CP
4702  *
4703  * @rdev: radeon_device pointer
4704  *
4705  * Update the page table base and flush the VM TLB
4706  * using the CP (CIK).
4707  */
4708 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4709 {
4710         struct radeon_ring *ring = &rdev->ring[ridx];
4711
4712         if (vm == NULL)
4713                 return;
4714
4715         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4716         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4717                                  WRITE_DATA_DST_SEL(0)));
4718         if (vm->id < 8) {
4719                 radeon_ring_write(ring,
4720                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4721         } else {
4722                 radeon_ring_write(ring,
4723                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4724         }
4725         radeon_ring_write(ring, 0);
4726         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4727
4728         /* update SH_MEM_* regs */
4729         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4730         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4731                                  WRITE_DATA_DST_SEL(0)));
4732         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4733         radeon_ring_write(ring, 0);
4734         radeon_ring_write(ring, VMID(vm->id));
4735
4736         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4737         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4738                                  WRITE_DATA_DST_SEL(0)));
4739         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4740         radeon_ring_write(ring, 0);
4741
4742         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4743         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4744         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4745         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4746
4747         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4748         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4749                                  WRITE_DATA_DST_SEL(0)));
4750         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4751         radeon_ring_write(ring, 0);
4752         radeon_ring_write(ring, VMID(0));
4753
4754         /* HDP flush */
4755         /* We should be using the WAIT_REG_MEM packet here like in
4756          * cik_fence_ring_emit(), but it causes the CP to hang in this
4757          * context...
4758          */
4759         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4760         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4761                                  WRITE_DATA_DST_SEL(0)));
4762         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4763         radeon_ring_write(ring, 0);
4764         radeon_ring_write(ring, 0);
4765
4766         /* bits 0-15 are the VM contexts0-15 */
4767         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4768         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4769                                  WRITE_DATA_DST_SEL(0)));
4770         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4771         radeon_ring_write(ring, 0);
4772         radeon_ring_write(ring, 1 << vm->id);
4773
4774         /* compute doesn't have PFP */
4775         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4776                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4777                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4778                 radeon_ring_write(ring, 0x0);
4779         }
4780 }
4781
4782 /**
4783  * cik_vm_set_page - update the page tables using sDMA
4784  *
4785  * @rdev: radeon_device pointer
4786  * @ib: indirect buffer to fill with commands
4787  * @pe: addr of the page entry
4788  * @addr: dst addr to write into pe
4789  * @count: number of page entries to update
4790  * @incr: increase next addr by incr bytes
4791  * @flags: access flags
4792  *
4793  * Update the page tables using CP or sDMA (CIK).
4794  */
4795 void cik_vm_set_page(struct radeon_device *rdev,
4796                      struct radeon_ib *ib,
4797                      uint64_t pe,
4798                      uint64_t addr, unsigned count,
4799                      uint32_t incr, uint32_t flags)
4800 {
4801         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4802         uint64_t value;
4803         unsigned ndw;
4804
4805         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4806                 /* CP */
4807                 while (count) {
4808                         ndw = 2 + count * 2;
4809                         if (ndw > 0x3FFE)
4810                                 ndw = 0x3FFE;
4811
4812                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4813                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4814                                                     WRITE_DATA_DST_SEL(1));
4815                         ib->ptr[ib->length_dw++] = pe;
4816                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4817                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4818                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4819                                         value = radeon_vm_map_gart(rdev, addr);
4820                                         value &= 0xFFFFFFFFFFFFF000ULL;
4821                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4822                                         value = addr;
4823                                 } else {
4824                                         value = 0;
4825                                 }
4826                                 addr += incr;
4827                                 value |= r600_flags;
4828                                 ib->ptr[ib->length_dw++] = value;
4829                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4830                         }
4831                 }
4832         } else {
4833                 /* DMA */
4834                 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4835         }
4836 }
4837
4838 /*
4839  * RLC
4840  * The RLC is a multi-purpose microengine that handles a
4841  * variety of functions, the most important of which is
4842  * the interrupt controller.
4843  */
4844 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4845                                           bool enable)
4846 {
4847         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4848
4849         if (enable)
4850                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4851         else
4852                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4853         WREG32(CP_INT_CNTL_RING0, tmp);
4854 }
4855
4856 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4857 {
4858         u32 tmp;
4859
4860         tmp = RREG32(RLC_LB_CNTL);
4861         if (enable)
4862                 tmp |= LOAD_BALANCE_ENABLE;
4863         else
4864                 tmp &= ~LOAD_BALANCE_ENABLE;
4865         WREG32(RLC_LB_CNTL, tmp);
4866 }
4867
4868 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4869 {
4870         u32 i, j, k;
4871         u32 mask;
4872
4873         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4874                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4875                         cik_select_se_sh(rdev, i, j);
4876                         for (k = 0; k < rdev->usec_timeout; k++) {
4877                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4878                                         break;
4879                                 udelay(1);
4880                         }
4881                 }
4882         }
4883         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4884
4885         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4886         for (k = 0; k < rdev->usec_timeout; k++) {
4887                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4888                         break;
4889                 udelay(1);
4890         }
4891 }
4892
4893 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4894 {
4895         u32 tmp;
4896
4897         tmp = RREG32(RLC_CNTL);
4898         if (tmp != rlc)
4899                 WREG32(RLC_CNTL, rlc);
4900 }
4901
4902 static u32 cik_halt_rlc(struct radeon_device *rdev)
4903 {
4904         u32 data, orig;
4905
4906         orig = data = RREG32(RLC_CNTL);
4907
4908         if (data & RLC_ENABLE) {
4909                 u32 i;
4910
4911                 data &= ~RLC_ENABLE;
4912                 WREG32(RLC_CNTL, data);
4913
4914                 for (i = 0; i < rdev->usec_timeout; i++) {
4915                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4916                                 break;
4917                         udelay(1);
4918                 }
4919
4920                 cik_wait_for_rlc_serdes(rdev);
4921         }
4922
4923         return orig;
4924 }
4925
4926 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4927 {
4928         u32 tmp, i, mask;
4929
4930         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4931         WREG32(RLC_GPR_REG2, tmp);
4932
4933         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4934         for (i = 0; i < rdev->usec_timeout; i++) {
4935                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4936                         break;
4937                 udelay(1);
4938         }
4939
4940         for (i = 0; i < rdev->usec_timeout; i++) {
4941                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4942                         break;
4943                 udelay(1);
4944         }
4945 }
4946
4947 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4948 {
4949         u32 tmp;
4950
4951         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4952         WREG32(RLC_GPR_REG2, tmp);
4953 }
4954
4955 /**
4956  * cik_rlc_stop - stop the RLC ME
4957  *
4958  * @rdev: radeon_device pointer
4959  *
4960  * Halt the RLC ME (MicroEngine) (CIK).
4961  */
4962 static void cik_rlc_stop(struct radeon_device *rdev)
4963 {
4964         WREG32(RLC_CNTL, 0);
4965
4966         cik_enable_gui_idle_interrupt(rdev, false);
4967
4968         cik_wait_for_rlc_serdes(rdev);
4969 }
4970
4971 /**
4972  * cik_rlc_start - start the RLC ME
4973  *
4974  * @rdev: radeon_device pointer
4975  *
4976  * Unhalt the RLC ME (MicroEngine) (CIK).
4977  */
4978 static void cik_rlc_start(struct radeon_device *rdev)
4979 {
4980         WREG32(RLC_CNTL, RLC_ENABLE);
4981
4982         cik_enable_gui_idle_interrupt(rdev, true);
4983
4984         udelay(50);
4985 }
4986
4987 /**
4988  * cik_rlc_resume - setup the RLC hw
4989  *
4990  * @rdev: radeon_device pointer
4991  *
4992  * Initialize the RLC registers, load the ucode,
4993  * and start the RLC (CIK).
4994  * Returns 0 for success, -EINVAL if the ucode is not available.
4995  */
4996 static int cik_rlc_resume(struct radeon_device *rdev)
4997 {
4998         u32 i, size, tmp;
4999         const __be32 *fw_data;
5000
5001         if (!rdev->rlc_fw)
5002                 return -EINVAL;
5003
5004         switch (rdev->family) {
5005         case CHIP_BONAIRE:
5006         default:
5007                 size = BONAIRE_RLC_UCODE_SIZE;
5008                 break;
5009         case CHIP_KAVERI:
5010                 size = KV_RLC_UCODE_SIZE;
5011                 break;
5012         case CHIP_KABINI:
5013                 size = KB_RLC_UCODE_SIZE;
5014                 break;
5015         }
5016
5017         cik_rlc_stop(rdev);
5018
5019         /* disable CG */
5020         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5021         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5022
5023         si_rlc_reset(rdev);
5024
5025         cik_init_pg(rdev);
5026
5027         cik_init_cg(rdev);
5028
5029         WREG32(RLC_LB_CNTR_INIT, 0);
5030         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5031
5032         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5033         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5034         WREG32(RLC_LB_PARAMS, 0x00600408);
5035         WREG32(RLC_LB_CNTL, 0x80000004);
5036
5037         WREG32(RLC_MC_CNTL, 0);
5038         WREG32(RLC_UCODE_CNTL, 0);
5039
5040         fw_data = (const __be32 *)rdev->rlc_fw->data;
5041                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5042         for (i = 0; i < size; i++)
5043                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5044         WREG32(RLC_GPM_UCODE_ADDR, 0);
5045
5046         /* XXX - find out what chips support lbpw */
5047         cik_enable_lbpw(rdev, false);
5048
5049         if (rdev->family == CHIP_BONAIRE)
5050                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5051
5052         cik_rlc_start(rdev);
5053
5054         return 0;
5055 }
5056
5057 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5058 {
5059         u32 data, orig, tmp, tmp2;
5060
5061         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5062
5063         cik_enable_gui_idle_interrupt(rdev, enable);
5064
5065         if (enable) {
5066                 tmp = cik_halt_rlc(rdev);
5067
5068                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5069                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5070                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5071                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5072                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5073
5074                 cik_update_rlc(rdev, tmp);
5075
5076                 data |= CGCG_EN | CGLS_EN;
5077         } else {
5078                 RREG32(CB_CGTT_SCLK_CTRL);
5079                 RREG32(CB_CGTT_SCLK_CTRL);
5080                 RREG32(CB_CGTT_SCLK_CTRL);
5081                 RREG32(CB_CGTT_SCLK_CTRL);
5082
5083                 data &= ~(CGCG_EN | CGLS_EN);
5084         }
5085
5086         if (orig != data)
5087                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5088
5089 }
5090
5091 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5092 {
5093         u32 data, orig, tmp = 0;
5094
5095         if (enable) {
5096                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5097                 data |= CP_MEM_LS_EN;
5098                 if (orig != data)
5099                         WREG32(CP_MEM_SLP_CNTL, data);
5100
5101                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5102                 data &= 0xfffffffd;
5103                 if (orig != data)
5104                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5105
5106                 tmp = cik_halt_rlc(rdev);
5107
5108                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5109                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5110                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5111                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5112                 WREG32(RLC_SERDES_WR_CTRL, data);
5113
5114                 cik_update_rlc(rdev, tmp);
5115
5116                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5117                 data &= ~SM_MODE_MASK;
5118                 data |= SM_MODE(0x2);
5119                 data |= SM_MODE_ENABLE;
5120                 data &= ~CGTS_OVERRIDE;
5121                 data &= ~CGTS_LS_OVERRIDE;
5122                 data &= ~ON_MONITOR_ADD_MASK;
5123                 data |= ON_MONITOR_ADD_EN;
5124                 data |= ON_MONITOR_ADD(0x96);
5125                 if (orig != data)
5126                         WREG32(CGTS_SM_CTRL_REG, data);
5127         } else {
5128                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5129                 data |= 0x00000002;
5130                 if (orig != data)
5131                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5132
5133                 data = RREG32(RLC_MEM_SLP_CNTL);
5134                 if (data & RLC_MEM_LS_EN) {
5135                         data &= ~RLC_MEM_LS_EN;
5136                         WREG32(RLC_MEM_SLP_CNTL, data);
5137                 }
5138
5139                 data = RREG32(CP_MEM_SLP_CNTL);
5140                 if (data & CP_MEM_LS_EN) {
5141                         data &= ~CP_MEM_LS_EN;
5142                         WREG32(CP_MEM_SLP_CNTL, data);
5143                 }
5144
5145                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5146                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5147                 if (orig != data)
5148                         WREG32(CGTS_SM_CTRL_REG, data);
5149
5150                 tmp = cik_halt_rlc(rdev);
5151
5152                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5153                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5154                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5155                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5156                 WREG32(RLC_SERDES_WR_CTRL, data);
5157
5158                 cik_update_rlc(rdev, tmp);
5159         }
5160 }
5161
5162 static const u32 mc_cg_registers[] =
5163 {
5164         MC_HUB_MISC_HUB_CG,
5165         MC_HUB_MISC_SIP_CG,
5166         MC_HUB_MISC_VM_CG,
5167         MC_XPB_CLK_GAT,
5168         ATC_MISC_CG,
5169         MC_CITF_MISC_WR_CG,
5170         MC_CITF_MISC_RD_CG,
5171         MC_CITF_MISC_VM_CG,
5172         VM_L2_CG,
5173 };
5174
5175 static void cik_enable_mc_ls(struct radeon_device *rdev,
5176                              bool enable)
5177 {
5178         int i;
5179         u32 orig, data;
5180
5181         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5182                 orig = data = RREG32(mc_cg_registers[i]);
5183                 if (enable)
5184                         data |= MC_LS_ENABLE;
5185                 else
5186                         data &= ~MC_LS_ENABLE;
5187                 if (data != orig)
5188                         WREG32(mc_cg_registers[i], data);
5189         }
5190 }
5191
5192 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5193                                bool enable)
5194 {
5195         int i;
5196         u32 orig, data;
5197
5198         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5199                 orig = data = RREG32(mc_cg_registers[i]);
5200                 if (enable)
5201                         data |= MC_CG_ENABLE;
5202                 else
5203                         data &= ~MC_CG_ENABLE;
5204                 if (data != orig)
5205                         WREG32(mc_cg_registers[i], data);
5206         }
5207 }
5208
5209 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5210                                  bool enable)
5211 {
5212         u32 orig, data;
5213
5214         if (enable) {
5215                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5216                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5217         } else {
5218                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5219                 data |= 0xff000000;
5220                 if (data != orig)
5221                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5222
5223                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5224                 data |= 0xff000000;
5225                 if (data != orig)
5226                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5227         }
5228 }
5229
5230 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5231                                  bool enable)
5232 {
5233         u32 orig, data;
5234
5235         if (enable) {
5236                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5237                 data |= 0x100;
5238                 if (orig != data)
5239                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5240
5241                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5242                 data |= 0x100;
5243                 if (orig != data)
5244                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5245         } else {
5246                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5247                 data &= ~0x100;
5248                 if (orig != data)
5249                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5250
5251                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5252                 data &= ~0x100;
5253                 if (orig != data)
5254                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5255         }
5256 }
5257
5258 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5259                                 bool enable)
5260 {
5261         u32 orig, data;
5262
5263         if (enable) {
5264                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5265                 data = 0xfff;
5266                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5267
5268                 orig = data = RREG32(UVD_CGC_CTRL);
5269                 data |= DCM;
5270                 if (orig != data)
5271                         WREG32(UVD_CGC_CTRL, data);
5272         } else {
5273                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5274                 data &= ~0xfff;
5275                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5276
5277                 orig = data = RREG32(UVD_CGC_CTRL);
5278                 data &= ~DCM;
5279                 if (orig != data)
5280                         WREG32(UVD_CGC_CTRL, data);
5281         }
5282 }
5283
5284 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5285                                 bool enable)
5286 {
5287         u32 orig, data;
5288
5289         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5290
5291         if (enable)
5292                 data &= ~CLOCK_GATING_DIS;
5293         else
5294                 data |= CLOCK_GATING_DIS;
5295
5296         if (orig != data)
5297                 WREG32(HDP_HOST_PATH_CNTL, data);
5298 }
5299
5300 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5301                               bool enable)
5302 {
5303         u32 orig, data;
5304
5305         orig = data = RREG32(HDP_MEM_POWER_LS);
5306
5307         if (enable)
5308                 data |= HDP_LS_ENABLE;
5309         else
5310                 data &= ~HDP_LS_ENABLE;
5311
5312         if (orig != data)
5313                 WREG32(HDP_MEM_POWER_LS, data);
5314 }
5315
5316 void cik_update_cg(struct radeon_device *rdev,
5317                    u32 block, bool enable)
5318 {
5319         if (block & RADEON_CG_BLOCK_GFX) {
5320                 /* order matters! */
5321                 if (enable) {
5322                         cik_enable_mgcg(rdev, true);
5323                         cik_enable_cgcg(rdev, true);
5324                 } else {
5325                         cik_enable_cgcg(rdev, false);
5326                         cik_enable_mgcg(rdev, false);
5327                 }
5328         }
5329
5330         if (block & RADEON_CG_BLOCK_MC) {
5331                 if (!(rdev->flags & RADEON_IS_IGP)) {
5332                         cik_enable_mc_mgcg(rdev, enable);
5333                         cik_enable_mc_ls(rdev, enable);
5334                 }
5335         }
5336
5337         if (block & RADEON_CG_BLOCK_SDMA) {
5338                 cik_enable_sdma_mgcg(rdev, enable);
5339                 cik_enable_sdma_mgls(rdev, enable);
5340         }
5341
5342         if (block & RADEON_CG_BLOCK_UVD) {
5343                 if (rdev->has_uvd)
5344                         cik_enable_uvd_mgcg(rdev, enable);
5345         }
5346
5347         if (block & RADEON_CG_BLOCK_HDP) {
5348                 cik_enable_hdp_mgcg(rdev, enable);
5349                 cik_enable_hdp_ls(rdev, enable);
5350         }
5351 }
5352
5353 static void cik_init_cg(struct radeon_device *rdev)
5354 {
5355
5356         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
5357
5358         if (rdev->has_uvd)
5359                 si_init_uvd_internal_cg(rdev);
5360
5361         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5362                              RADEON_CG_BLOCK_SDMA |
5363                              RADEON_CG_BLOCK_UVD |
5364                              RADEON_CG_BLOCK_HDP), true);
5365 }
5366
5367 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5368                                           bool enable)
5369 {
5370         u32 data, orig;
5371
5372         orig = data = RREG32(RLC_PG_CNTL);
5373         if (enable)
5374                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5375         else
5376                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5377         if (orig != data)
5378                 WREG32(RLC_PG_CNTL, data);
5379 }
5380
5381 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5382                                           bool enable)
5383 {
5384         u32 data, orig;
5385
5386         orig = data = RREG32(RLC_PG_CNTL);
5387         if (enable)
5388                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5389         else
5390                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5391         if (orig != data)
5392                 WREG32(RLC_PG_CNTL, data);
5393 }
5394
5395 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5396 {
5397         u32 data, orig;
5398
5399         orig = data = RREG32(RLC_PG_CNTL);
5400         if (enable)
5401                 data &= ~DISABLE_CP_PG;
5402         else
5403                 data |= DISABLE_CP_PG;
5404         if (orig != data)
5405                 WREG32(RLC_PG_CNTL, data);
5406 }
5407
5408 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5409 {
5410         u32 data, orig;
5411
5412         orig = data = RREG32(RLC_PG_CNTL);
5413         if (enable)
5414                 data &= ~DISABLE_GDS_PG;
5415         else
5416                 data |= DISABLE_GDS_PG;
5417         if (orig != data)
5418                 WREG32(RLC_PG_CNTL, data);
5419 }
5420
5421 #define CP_ME_TABLE_SIZE    96
5422 #define CP_ME_TABLE_OFFSET  2048
5423 #define CP_MEC_TABLE_OFFSET 4096
5424
5425 void cik_init_cp_pg_table(struct radeon_device *rdev)
5426 {
5427         const __be32 *fw_data;
5428         volatile u32 *dst_ptr;
5429         int me, i, max_me = 4;
5430         u32 bo_offset = 0;
5431         u32 table_offset;
5432
5433         if (rdev->family == CHIP_KAVERI)
5434                 max_me = 5;
5435
5436         if (rdev->rlc.cp_table_ptr == NULL)
5437                 return;
5438
5439         /* write the cp table buffer */
5440         dst_ptr = rdev->rlc.cp_table_ptr;
5441         for (me = 0; me < max_me; me++) {
5442                 if (me == 0) {
5443                         fw_data = (const __be32 *)rdev->ce_fw->data;
5444                         table_offset = CP_ME_TABLE_OFFSET;
5445                 } else if (me == 1) {
5446                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5447                         table_offset = CP_ME_TABLE_OFFSET;
5448                 } else if (me == 2) {
5449                         fw_data = (const __be32 *)rdev->me_fw->data;
5450                         table_offset = CP_ME_TABLE_OFFSET;
5451                 } else {
5452                         fw_data = (const __be32 *)rdev->mec_fw->data;
5453                         table_offset = CP_MEC_TABLE_OFFSET;
5454                 }
5455
5456                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5457                         dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5458                 }
5459                 bo_offset += CP_ME_TABLE_SIZE;
5460         }
5461 }
5462
5463 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5464                                 bool enable)
5465 {
5466         u32 data, orig;
5467
5468         if (enable) {
5469                 orig = data = RREG32(RLC_PG_CNTL);
5470                 data |= GFX_PG_ENABLE;
5471                 if (orig != data)
5472                         WREG32(RLC_PG_CNTL, data);
5473
5474                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5475                 data |= AUTO_PG_EN;
5476                 if (orig != data)
5477                         WREG32(RLC_AUTO_PG_CTRL, data);
5478         } else {
5479                 orig = data = RREG32(RLC_PG_CNTL);
5480                 data &= ~GFX_PG_ENABLE;
5481                 if (orig != data)
5482                         WREG32(RLC_PG_CNTL, data);
5483
5484                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5485                 data &= ~AUTO_PG_EN;
5486                 if (orig != data)
5487                         WREG32(RLC_AUTO_PG_CTRL, data);
5488
5489                 data = RREG32(DB_RENDER_CONTROL);
5490         }
5491 }
5492
5493 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5494 {
5495         u32 mask = 0, tmp, tmp1;
5496         int i;
5497
5498         cik_select_se_sh(rdev, se, sh);
5499         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5500         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5501         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5502
5503         tmp &= 0xffff0000;
5504
5505         tmp |= tmp1;
5506         tmp >>= 16;
5507
5508         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5509                 mask <<= 1;
5510                 mask |= 1;
5511         }
5512
5513         return (~tmp) & mask;
5514 }
5515
5516 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5517 {
5518         u32 i, j, k, active_cu_number = 0;
5519         u32 mask, counter, cu_bitmap;
5520         u32 tmp = 0;
5521
5522         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5523                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5524                         mask = 1;
5525                         cu_bitmap = 0;
5526                         counter = 0;
5527                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5528                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5529                                         if (counter < 2)
5530                                                 cu_bitmap |= mask;
5531                                         counter ++;
5532                                 }
5533                                 mask <<= 1;
5534                         }
5535
5536                         active_cu_number += counter;
5537                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5538                 }
5539         }
5540
5541         WREG32(RLC_PG_AO_CU_MASK, tmp);
5542
5543         tmp = RREG32(RLC_MAX_PG_CU);
5544         tmp &= ~MAX_PU_CU_MASK;
5545         tmp |= MAX_PU_CU(active_cu_number);
5546         WREG32(RLC_MAX_PG_CU, tmp);
5547 }
5548
5549 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5550                                        bool enable)
5551 {
5552         u32 data, orig;
5553
5554         orig = data = RREG32(RLC_PG_CNTL);
5555         if (enable)
5556                 data |= STATIC_PER_CU_PG_ENABLE;
5557         else
5558                 data &= ~STATIC_PER_CU_PG_ENABLE;
5559         if (orig != data)
5560                 WREG32(RLC_PG_CNTL, data);
5561 }
5562
5563 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5564                                         bool enable)
5565 {
5566         u32 data, orig;
5567
5568         orig = data = RREG32(RLC_PG_CNTL);
5569         if (enable)
5570                 data |= DYN_PER_CU_PG_ENABLE;
5571         else
5572                 data &= ~DYN_PER_CU_PG_ENABLE;
5573         if (orig != data)
5574                 WREG32(RLC_PG_CNTL, data);
5575 }
5576
5577 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5578 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5579
5580 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5581 {
5582         u32 data, orig;
5583         u32 i;
5584
5585         if (rdev->rlc.cs_data) {
5586                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5587                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5588                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
5589                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5590         } else {
5591                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5592                 for (i = 0; i < 3; i++)
5593                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5594         }
5595         if (rdev->rlc.reg_list) {
5596                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5597                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5598                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5599         }
5600
5601         orig = data = RREG32(RLC_PG_CNTL);
5602         data |= GFX_PG_SRC;
5603         if (orig != data)
5604                 WREG32(RLC_PG_CNTL, data);
5605
5606         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5607         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5608
5609         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5610         data &= ~IDLE_POLL_COUNT_MASK;
5611         data |= IDLE_POLL_COUNT(0x60);
5612         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5613
5614         data = 0x10101010;
5615         WREG32(RLC_PG_DELAY, data);
5616
5617         data = RREG32(RLC_PG_DELAY_2);
5618         data &= ~0xff;
5619         data |= 0x3;
5620         WREG32(RLC_PG_DELAY_2, data);
5621
5622         data = RREG32(RLC_AUTO_PG_CTRL);
5623         data &= ~GRBM_REG_SGIT_MASK;
5624         data |= GRBM_REG_SGIT(0x700);
5625         WREG32(RLC_AUTO_PG_CTRL, data);
5626
5627 }
5628
5629 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5630 {
5631         bool has_pg = false;
5632         bool has_dyn_mgpg = false;
5633         bool has_static_mgpg = false;
5634
5635         /* only APUs have PG */
5636         if (rdev->flags & RADEON_IS_IGP) {
5637                 has_pg = true;
5638                 has_static_mgpg = true;
5639                 if (rdev->family == CHIP_KAVERI)
5640                         has_dyn_mgpg = true;
5641         }
5642
5643         if (has_pg) {
5644                 cik_enable_gfx_cgpg(rdev, enable);
5645                 if (enable) {
5646                         cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
5647                         cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
5648                 } else {
5649                         cik_enable_gfx_static_mgpg(rdev, false);
5650                         cik_enable_gfx_dynamic_mgpg(rdev, false);
5651                 }
5652         }
5653
5654 }
5655
5656 void cik_init_pg(struct radeon_device *rdev)
5657 {
5658         bool has_pg = false;
5659
5660         /* only APUs have PG */
5661         if (rdev->flags & RADEON_IS_IGP) {
5662                 /* XXX disable this for now */
5663                 /* has_pg = true; */
5664         }
5665
5666         if (has_pg) {
5667                 cik_enable_sck_slowdown_on_pu(rdev, true);
5668                 cik_enable_sck_slowdown_on_pd(rdev, true);
5669                 cik_init_gfx_cgpg(rdev);
5670                 cik_enable_cp_pg(rdev, true);
5671                 cik_enable_gds_pg(rdev, true);
5672                 cik_init_ao_cu_mask(rdev);
5673                 cik_update_gfx_pg(rdev, true);
5674         }
5675 }
5676
5677 /*
5678  * Interrupts
5679  * Starting with r6xx, interrupts are handled via a ring buffer.
5680  * Ring buffers are areas of GPU accessible memory that the GPU
5681  * writes interrupt vectors into and the host reads vectors out of.
5682  * There is a rptr (read pointer) that determines where the
5683  * host is currently reading, and a wptr (write pointer)
5684  * which determines where the GPU has written.  When the
5685  * pointers are equal, the ring is idle.  When the GPU
5686  * writes vectors to the ring buffer, it increments the
5687  * wptr.  When there is an interrupt, the host then starts
5688  * fetching commands and processing them until the pointers are
5689  * equal again at which point it updates the rptr.
5690  */
5691
5692 /**
5693  * cik_enable_interrupts - Enable the interrupt ring buffer
5694  *
5695  * @rdev: radeon_device pointer
5696  *
5697  * Enable the interrupt ring buffer (CIK).
5698  */
5699 static void cik_enable_interrupts(struct radeon_device *rdev)
5700 {
5701         u32 ih_cntl = RREG32(IH_CNTL);
5702         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5703
5704         ih_cntl |= ENABLE_INTR;
5705         ih_rb_cntl |= IH_RB_ENABLE;
5706         WREG32(IH_CNTL, ih_cntl);
5707         WREG32(IH_RB_CNTL, ih_rb_cntl);
5708         rdev->ih.enabled = true;
5709 }
5710
5711 /**
5712  * cik_disable_interrupts - Disable the interrupt ring buffer
5713  *
5714  * @rdev: radeon_device pointer
5715  *
5716  * Disable the interrupt ring buffer (CIK).
5717  */
5718 static void cik_disable_interrupts(struct radeon_device *rdev)
5719 {
5720         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5721         u32 ih_cntl = RREG32(IH_CNTL);
5722
5723         ih_rb_cntl &= ~IH_RB_ENABLE;
5724         ih_cntl &= ~ENABLE_INTR;
5725         WREG32(IH_RB_CNTL, ih_rb_cntl);
5726         WREG32(IH_CNTL, ih_cntl);
5727         /* set rptr, wptr to 0 */
5728         WREG32(IH_RB_RPTR, 0);
5729         WREG32(IH_RB_WPTR, 0);
5730         rdev->ih.enabled = false;
5731         rdev->ih.rptr = 0;
5732 }
5733
5734 /**
5735  * cik_disable_interrupt_state - Disable all interrupt sources
5736  *
5737  * @rdev: radeon_device pointer
5738  *
5739  * Clear all interrupt enable bits used by the driver (CIK).
5740  */
5741 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5742 {
5743         u32 tmp;
5744
5745         /* gfx ring */
5746         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5747         /* sdma */
5748         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5749         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5750         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5751         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5752         /* compute queues */
5753         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5754         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5755         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5756         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5757         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5758         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5759         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5760         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5761         /* grbm */
5762         WREG32(GRBM_INT_CNTL, 0);
5763         /* vline/vblank, etc. */
5764         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5765         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5766         if (rdev->num_crtc >= 4) {
5767                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5768                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5769         }
5770         if (rdev->num_crtc >= 6) {
5771                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5772                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5773         }
5774
5775         /* dac hotplug */
5776         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5777
5778         /* digital hotplug */
5779         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5780         WREG32(DC_HPD1_INT_CONTROL, tmp);
5781         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5782         WREG32(DC_HPD2_INT_CONTROL, tmp);
5783         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5784         WREG32(DC_HPD3_INT_CONTROL, tmp);
5785         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5786         WREG32(DC_HPD4_INT_CONTROL, tmp);
5787         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5788         WREG32(DC_HPD5_INT_CONTROL, tmp);
5789         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5790         WREG32(DC_HPD6_INT_CONTROL, tmp);
5791
5792 }
5793
5794 /**
5795  * cik_irq_init - init and enable the interrupt ring
5796  *
5797  * @rdev: radeon_device pointer
5798  *
5799  * Allocate a ring buffer for the interrupt controller,
5800  * enable the RLC, disable interrupts, enable the IH
5801  * ring buffer and enable it (CIK).
5802  * Called at device load and reume.
5803  * Returns 0 for success, errors for failure.
5804  */
5805 static int cik_irq_init(struct radeon_device *rdev)
5806 {
5807         int ret = 0;
5808         int rb_bufsz;
5809         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5810
5811         /* allocate ring */
5812         ret = r600_ih_ring_alloc(rdev);
5813         if (ret)
5814                 return ret;
5815
5816         /* disable irqs */
5817         cik_disable_interrupts(rdev);
5818
5819         /* init rlc */
5820         ret = cik_rlc_resume(rdev);
5821         if (ret) {
5822                 r600_ih_ring_fini(rdev);
5823                 return ret;
5824         }
5825
5826         /* setup interrupt control */
5827         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5828         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5829         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5830         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5831          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5832          */
5833         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5834         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5835         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5836         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5837
5838         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5839         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5840
5841         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5842                       IH_WPTR_OVERFLOW_CLEAR |
5843                       (rb_bufsz << 1));
5844
5845         if (rdev->wb.enabled)
5846                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5847
5848         /* set the writeback address whether it's enabled or not */
5849         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5850         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5851
5852         WREG32(IH_RB_CNTL, ih_rb_cntl);
5853
5854         /* set rptr, wptr to 0 */
5855         WREG32(IH_RB_RPTR, 0);
5856         WREG32(IH_RB_WPTR, 0);
5857
5858         /* Default settings for IH_CNTL (disabled at first) */
5859         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5860         /* RPTR_REARM only works if msi's are enabled */
5861         if (rdev->msi_enabled)
5862                 ih_cntl |= RPTR_REARM;
5863         WREG32(IH_CNTL, ih_cntl);
5864
5865         /* force the active interrupt state to all disabled */
5866         cik_disable_interrupt_state(rdev);
5867
5868         pci_set_master(rdev->pdev);
5869
5870         /* enable irqs */
5871         cik_enable_interrupts(rdev);
5872
5873         return ret;
5874 }
5875
5876 /**
5877  * cik_irq_set - enable/disable interrupt sources
5878  *
5879  * @rdev: radeon_device pointer
5880  *
5881  * Enable interrupt sources on the GPU (vblanks, hpd,
5882  * etc.) (CIK).
5883  * Returns 0 for success, errors for failure.
5884  */
5885 int cik_irq_set(struct radeon_device *rdev)
5886 {
5887         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5888                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5889         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5890         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5891         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5892         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5893         u32 grbm_int_cntl = 0;
5894         u32 dma_cntl, dma_cntl1;
5895         u32 thermal_int;
5896
5897         if (!rdev->irq.installed) {
5898                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5899                 return -EINVAL;
5900         }
5901         /* don't enable anything if the ih is disabled */
5902         if (!rdev->ih.enabled) {
5903                 cik_disable_interrupts(rdev);
5904                 /* force the active interrupt state to all disabled */
5905                 cik_disable_interrupt_state(rdev);
5906                 return 0;
5907         }
5908
5909         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5910         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5911         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5912         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5913         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5914         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5915
5916         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5917         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5918
5919         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5920         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5921         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5922         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5923         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5924         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5925         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5926         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5927
5928         if (rdev->flags & RADEON_IS_IGP)
5929                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
5930                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
5931         else
5932                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
5933                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5934
5935         /* enable CP interrupts on all rings */
5936         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5937                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5938                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5939         }
5940         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5941                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5942                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5943                 if (ring->me == 1) {
5944                         switch (ring->pipe) {
5945                         case 0:
5946                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5947                                 break;
5948                         case 1:
5949                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5950                                 break;
5951                         case 2:
5952                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5953                                 break;
5954                         case 3:
5955                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5956                                 break;
5957                         default:
5958                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5959                                 break;
5960                         }
5961                 } else if (ring->me == 2) {
5962                         switch (ring->pipe) {
5963                         case 0:
5964                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5965                                 break;
5966                         case 1:
5967                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5968                                 break;
5969                         case 2:
5970                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5971                                 break;
5972                         case 3:
5973                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5974                                 break;
5975                         default:
5976                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5977                                 break;
5978                         }
5979                 } else {
5980                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5981                 }
5982         }
5983         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5984                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5985                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5986                 if (ring->me == 1) {
5987                         switch (ring->pipe) {
5988                         case 0:
5989                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5990                                 break;
5991                         case 1:
5992                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5993                                 break;
5994                         case 2:
5995                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5996                                 break;
5997                         case 3:
5998                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5999                                 break;
6000                         default:
6001                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6002                                 break;
6003                         }
6004                 } else if (ring->me == 2) {
6005                         switch (ring->pipe) {
6006                         case 0:
6007                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6008                                 break;
6009                         case 1:
6010                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6011                                 break;
6012                         case 2:
6013                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6014                                 break;
6015                         case 3:
6016                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6017                                 break;
6018                         default:
6019                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6020                                 break;
6021                         }
6022                 } else {
6023                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6024                 }
6025         }
6026
6027         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6028                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6029                 dma_cntl |= TRAP_ENABLE;
6030         }
6031
6032         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6033                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6034                 dma_cntl1 |= TRAP_ENABLE;
6035         }
6036
6037         if (rdev->irq.crtc_vblank_int[0] ||
6038             atomic_read(&rdev->irq.pflip[0])) {
6039                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6040                 crtc1 |= VBLANK_INTERRUPT_MASK;
6041         }
6042         if (rdev->irq.crtc_vblank_int[1] ||
6043             atomic_read(&rdev->irq.pflip[1])) {
6044                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6045                 crtc2 |= VBLANK_INTERRUPT_MASK;
6046         }
6047         if (rdev->irq.crtc_vblank_int[2] ||
6048             atomic_read(&rdev->irq.pflip[2])) {
6049                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6050                 crtc3 |= VBLANK_INTERRUPT_MASK;
6051         }
6052         if (rdev->irq.crtc_vblank_int[3] ||
6053             atomic_read(&rdev->irq.pflip[3])) {
6054                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6055                 crtc4 |= VBLANK_INTERRUPT_MASK;
6056         }
6057         if (rdev->irq.crtc_vblank_int[4] ||
6058             atomic_read(&rdev->irq.pflip[4])) {
6059                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6060                 crtc5 |= VBLANK_INTERRUPT_MASK;
6061         }
6062         if (rdev->irq.crtc_vblank_int[5] ||
6063             atomic_read(&rdev->irq.pflip[5])) {
6064                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6065                 crtc6 |= VBLANK_INTERRUPT_MASK;
6066         }
6067         if (rdev->irq.hpd[0]) {
6068                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6069                 hpd1 |= DC_HPDx_INT_EN;
6070         }
6071         if (rdev->irq.hpd[1]) {
6072                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6073                 hpd2 |= DC_HPDx_INT_EN;
6074         }
6075         if (rdev->irq.hpd[2]) {
6076                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6077                 hpd3 |= DC_HPDx_INT_EN;
6078         }
6079         if (rdev->irq.hpd[3]) {
6080                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6081                 hpd4 |= DC_HPDx_INT_EN;
6082         }
6083         if (rdev->irq.hpd[4]) {
6084                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6085                 hpd5 |= DC_HPDx_INT_EN;
6086         }
6087         if (rdev->irq.hpd[5]) {
6088                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6089                 hpd6 |= DC_HPDx_INT_EN;
6090         }
6091
6092         if (rdev->irq.dpm_thermal) {
6093                 DRM_DEBUG("dpm thermal\n");
6094                 if (rdev->flags & RADEON_IS_IGP)
6095                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6096                 else
6097                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6098         }
6099
6100         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6101
6102         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6103         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6104
6105         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6106         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6107         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6108         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6109         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6110         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6111         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6112         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6113
6114         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6115
6116         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6117         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6118         if (rdev->num_crtc >= 4) {
6119                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6120                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6121         }
6122         if (rdev->num_crtc >= 6) {
6123                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6124                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6125         }
6126
6127         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6128         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6129         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6130         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6131         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6132         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6133
6134         if (rdev->flags & RADEON_IS_IGP)
6135                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6136         else
6137                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6138
6139         return 0;
6140 }
6141
6142 /**
6143  * cik_irq_ack - ack interrupt sources
6144  *
6145  * @rdev: radeon_device pointer
6146  *
6147  * Ack interrupt sources on the GPU (vblanks, hpd,
6148  * etc.) (CIK).  Certain interrupts sources are sw
6149  * generated and do not require an explicit ack.
6150  */
6151 static inline void cik_irq_ack(struct radeon_device *rdev)
6152 {
6153         u32 tmp;
6154
6155         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6156         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6157         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6158         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6159         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6160         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6161         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6162
6163         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6164                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6165         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6166                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6167         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6168                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6169         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6170                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6171
6172         if (rdev->num_crtc >= 4) {
6173                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6174                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6175                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6176                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6177                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6178                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6179                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6180                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6181         }
6182
6183         if (rdev->num_crtc >= 6) {
6184                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6185                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6186                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6187                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6188                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6189                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6190                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6191                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6192         }
6193
6194         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6195                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6196                 tmp |= DC_HPDx_INT_ACK;
6197                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6198         }
6199         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6200                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6201                 tmp |= DC_HPDx_INT_ACK;
6202                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6203         }
6204         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6205                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6206                 tmp |= DC_HPDx_INT_ACK;
6207                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6208         }
6209         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6210                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6211                 tmp |= DC_HPDx_INT_ACK;
6212                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6213         }
6214         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6215                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6216                 tmp |= DC_HPDx_INT_ACK;
6217                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6218         }
6219         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6220                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6221                 tmp |= DC_HPDx_INT_ACK;
6222                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6223         }
6224 }
6225
6226 /**
6227  * cik_irq_disable - disable interrupts
6228  *
6229  * @rdev: radeon_device pointer
6230  *
6231  * Disable interrupts on the hw (CIK).
6232  */
6233 static void cik_irq_disable(struct radeon_device *rdev)
6234 {
6235         cik_disable_interrupts(rdev);
6236         /* Wait and acknowledge irq */
6237         mdelay(1);
6238         cik_irq_ack(rdev);
6239         cik_disable_interrupt_state(rdev);
6240 }
6241
6242 /**
6243  * cik_irq_disable - disable interrupts for suspend
6244  *
6245  * @rdev: radeon_device pointer
6246  *
6247  * Disable interrupts and stop the RLC (CIK).
6248  * Used for suspend.
6249  */
6250 static void cik_irq_suspend(struct radeon_device *rdev)
6251 {
6252         cik_irq_disable(rdev);
6253         cik_rlc_stop(rdev);
6254 }
6255
6256 /**
6257  * cik_irq_fini - tear down interrupt support
6258  *
6259  * @rdev: radeon_device pointer
6260  *
6261  * Disable interrupts on the hw and free the IH ring
6262  * buffer (CIK).
6263  * Used for driver unload.
6264  */
6265 static void cik_irq_fini(struct radeon_device *rdev)
6266 {
6267         cik_irq_suspend(rdev);
6268         r600_ih_ring_fini(rdev);
6269 }
6270
6271 /**
6272  * cik_get_ih_wptr - get the IH ring buffer wptr
6273  *
6274  * @rdev: radeon_device pointer
6275  *
6276  * Get the IH ring buffer wptr from either the register
6277  * or the writeback memory buffer (CIK).  Also check for
6278  * ring buffer overflow and deal with it.
6279  * Used by cik_irq_process().
6280  * Returns the value of the wptr.
6281  */
6282 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6283 {
6284         u32 wptr, tmp;
6285
6286         if (rdev->wb.enabled)
6287                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6288         else
6289                 wptr = RREG32(IH_RB_WPTR);
6290
6291         if (wptr & RB_OVERFLOW) {
6292                 /* When a ring buffer overflow happen start parsing interrupt
6293                  * from the last not overwritten vector (wptr + 16). Hopefully
6294                  * this should allow us to catchup.
6295                  */
6296                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6297                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6298                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6299                 tmp = RREG32(IH_RB_CNTL);
6300                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6301                 WREG32(IH_RB_CNTL, tmp);
6302         }
6303         return (wptr & rdev->ih.ptr_mask);
6304 }
6305
6306 /*        CIK IV Ring
6307  * Each IV ring entry is 128 bits:
6308  * [7:0]    - interrupt source id
6309  * [31:8]   - reserved
6310  * [59:32]  - interrupt source data
6311  * [63:60]  - reserved
6312  * [71:64]  - RINGID
6313  *            CP:
6314  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6315  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6316  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6317  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6318  *            PIPE_ID - ME0 0=3D
6319  *                    - ME1&2 compute dispatcher (4 pipes each)
6320  *            SDMA:
6321  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6322  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6323  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6324  * [79:72]  - VMID
6325  * [95:80]  - PASID
6326  * [127:96] - reserved
6327  */
6328 /**
6329  * cik_irq_process - interrupt handler
6330  *
6331  * @rdev: radeon_device pointer
6332  *
6333  * Interrupt hander (CIK).  Walk the IH ring,
6334  * ack interrupts and schedule work to handle
6335  * interrupt events.
6336  * Returns irq process return code.
6337  */
6338 int cik_irq_process(struct radeon_device *rdev)
6339 {
6340         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6341         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6342         u32 wptr;
6343         u32 rptr;
6344         u32 src_id, src_data, ring_id;
6345         u8 me_id, pipe_id, queue_id;
6346         u32 ring_index;
6347         bool queue_hotplug = false;
6348         bool queue_reset = false;
6349         u32 addr, status, mc_client;
6350         bool queue_thermal = false;
6351
6352         if (!rdev->ih.enabled || rdev->shutdown)
6353                 return IRQ_NONE;
6354
6355         wptr = cik_get_ih_wptr(rdev);
6356
6357 restart_ih:
6358         /* is somebody else already processing irqs? */
6359         if (atomic_xchg(&rdev->ih.lock, 1))
6360                 return IRQ_NONE;
6361
6362         rptr = rdev->ih.rptr;
6363         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6364
6365         /* Order reading of wptr vs. reading of IH ring data */
6366         rmb();
6367
6368         /* display interrupts */
6369         cik_irq_ack(rdev);
6370
6371         while (rptr != wptr) {
6372                 /* wptr/rptr are in bytes! */
6373                 ring_index = rptr / 4;
6374                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6375                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6376                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6377
6378                 switch (src_id) {
6379                 case 1: /* D1 vblank/vline */
6380                         switch (src_data) {
6381                         case 0: /* D1 vblank */
6382                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6383                                         if (rdev->irq.crtc_vblank_int[0]) {
6384                                                 drm_handle_vblank(rdev->ddev, 0);
6385                                                 rdev->pm.vblank_sync = true;
6386                                                 wake_up(&rdev->irq.vblank_queue);
6387                                         }
6388                                         if (atomic_read(&rdev->irq.pflip[0]))
6389                                                 radeon_crtc_handle_flip(rdev, 0);
6390                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6391                                         DRM_DEBUG("IH: D1 vblank\n");
6392                                 }
6393                                 break;
6394                         case 1: /* D1 vline */
6395                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6396                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6397                                         DRM_DEBUG("IH: D1 vline\n");
6398                                 }
6399                                 break;
6400                         default:
6401                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6402                                 break;
6403                         }
6404                         break;
6405                 case 2: /* D2 vblank/vline */
6406                         switch (src_data) {
6407                         case 0: /* D2 vblank */
6408                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6409                                         if (rdev->irq.crtc_vblank_int[1]) {
6410                                                 drm_handle_vblank(rdev->ddev, 1);
6411                                                 rdev->pm.vblank_sync = true;
6412                                                 wake_up(&rdev->irq.vblank_queue);
6413                                         }
6414                                         if (atomic_read(&rdev->irq.pflip[1]))
6415                                                 radeon_crtc_handle_flip(rdev, 1);
6416                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6417                                         DRM_DEBUG("IH: D2 vblank\n");
6418                                 }
6419                                 break;
6420                         case 1: /* D2 vline */
6421                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6422                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6423                                         DRM_DEBUG("IH: D2 vline\n");
6424                                 }
6425                                 break;
6426                         default:
6427                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6428                                 break;
6429                         }
6430                         break;
6431                 case 3: /* D3 vblank/vline */
6432                         switch (src_data) {
6433                         case 0: /* D3 vblank */
6434                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6435                                         if (rdev->irq.crtc_vblank_int[2]) {
6436                                                 drm_handle_vblank(rdev->ddev, 2);
6437                                                 rdev->pm.vblank_sync = true;
6438                                                 wake_up(&rdev->irq.vblank_queue);
6439                                         }
6440                                         if (atomic_read(&rdev->irq.pflip[2]))
6441                                                 radeon_crtc_handle_flip(rdev, 2);
6442                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6443                                         DRM_DEBUG("IH: D3 vblank\n");
6444                                 }
6445                                 break;
6446                         case 1: /* D3 vline */
6447                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6448                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6449                                         DRM_DEBUG("IH: D3 vline\n");
6450                                 }
6451                                 break;
6452                         default:
6453                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6454                                 break;
6455                         }
6456                         break;
6457                 case 4: /* D4 vblank/vline */
6458                         switch (src_data) {
6459                         case 0: /* D4 vblank */
6460                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6461                                         if (rdev->irq.crtc_vblank_int[3]) {
6462                                                 drm_handle_vblank(rdev->ddev, 3);
6463                                                 rdev->pm.vblank_sync = true;
6464                                                 wake_up(&rdev->irq.vblank_queue);
6465                                         }
6466                                         if (atomic_read(&rdev->irq.pflip[3]))
6467                                                 radeon_crtc_handle_flip(rdev, 3);
6468                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6469                                         DRM_DEBUG("IH: D4 vblank\n");
6470                                 }
6471                                 break;
6472                         case 1: /* D4 vline */
6473                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6474                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6475                                         DRM_DEBUG("IH: D4 vline\n");
6476                                 }
6477                                 break;
6478                         default:
6479                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6480                                 break;
6481                         }
6482                         break;
6483                 case 5: /* D5 vblank/vline */
6484                         switch (src_data) {
6485                         case 0: /* D5 vblank */
6486                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6487                                         if (rdev->irq.crtc_vblank_int[4]) {
6488                                                 drm_handle_vblank(rdev->ddev, 4);
6489                                                 rdev->pm.vblank_sync = true;
6490                                                 wake_up(&rdev->irq.vblank_queue);
6491                                         }
6492                                         if (atomic_read(&rdev->irq.pflip[4]))
6493                                                 radeon_crtc_handle_flip(rdev, 4);
6494                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6495                                         DRM_DEBUG("IH: D5 vblank\n");
6496                                 }
6497                                 break;
6498                         case 1: /* D5 vline */
6499                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6500                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6501                                         DRM_DEBUG("IH: D5 vline\n");
6502                                 }
6503                                 break;
6504                         default:
6505                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6506                                 break;
6507                         }
6508                         break;
6509                 case 6: /* D6 vblank/vline */
6510                         switch (src_data) {
6511                         case 0: /* D6 vblank */
6512                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6513                                         if (rdev->irq.crtc_vblank_int[5]) {
6514                                                 drm_handle_vblank(rdev->ddev, 5);
6515                                                 rdev->pm.vblank_sync = true;
6516                                                 wake_up(&rdev->irq.vblank_queue);
6517                                         }
6518                                         if (atomic_read(&rdev->irq.pflip[5]))
6519                                                 radeon_crtc_handle_flip(rdev, 5);
6520                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6521                                         DRM_DEBUG("IH: D6 vblank\n");
6522                                 }
6523                                 break;
6524                         case 1: /* D6 vline */
6525                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6526                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6527                                         DRM_DEBUG("IH: D6 vline\n");
6528                                 }
6529                                 break;
6530                         default:
6531                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6532                                 break;
6533                         }
6534                         break;
6535                 case 42: /* HPD hotplug */
6536                         switch (src_data) {
6537                         case 0:
6538                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6539                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6540                                         queue_hotplug = true;
6541                                         DRM_DEBUG("IH: HPD1\n");
6542                                 }
6543                                 break;
6544                         case 1:
6545                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6546                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6547                                         queue_hotplug = true;
6548                                         DRM_DEBUG("IH: HPD2\n");
6549                                 }
6550                                 break;
6551                         case 2:
6552                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6553                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6554                                         queue_hotplug = true;
6555                                         DRM_DEBUG("IH: HPD3\n");
6556                                 }
6557                                 break;
6558                         case 3:
6559                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6560                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6561                                         queue_hotplug = true;
6562                                         DRM_DEBUG("IH: HPD4\n");
6563                                 }
6564                                 break;
6565                         case 4:
6566                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6567                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6568                                         queue_hotplug = true;
6569                                         DRM_DEBUG("IH: HPD5\n");
6570                                 }
6571                                 break;
6572                         case 5:
6573                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6574                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6575                                         queue_hotplug = true;
6576                                         DRM_DEBUG("IH: HPD6\n");
6577                                 }
6578                                 break;
6579                         default:
6580                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6581                                 break;
6582                         }
6583                         break;
6584                 case 146:
6585                 case 147:
6586                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6587                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6588                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6589                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6590                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6591                                 addr);
6592                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6593                                 status);
6594                         cik_vm_decode_fault(rdev, status, addr, mc_client);
6595                         /* reset addr and status */
6596                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6597                         break;
6598                 case 176: /* GFX RB CP_INT */
6599                 case 177: /* GFX IB CP_INT */
6600                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6601                         break;
6602                 case 181: /* CP EOP event */
6603                         DRM_DEBUG("IH: CP EOP\n");
6604                         /* XXX check the bitfield order! */
6605                         me_id = (ring_id & 0x60) >> 5;
6606                         pipe_id = (ring_id & 0x18) >> 3;
6607                         queue_id = (ring_id & 0x7) >> 0;
6608                         switch (me_id) {
6609                         case 0:
6610                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6611                                 break;
6612                         case 1:
6613                         case 2:
6614                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6615                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6616                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6617                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6618                                 break;
6619                         }
6620                         break;
6621                 case 184: /* CP Privileged reg access */
6622                         DRM_ERROR("Illegal register access in command stream\n");
6623                         /* XXX check the bitfield order! */
6624                         me_id = (ring_id & 0x60) >> 5;
6625                         pipe_id = (ring_id & 0x18) >> 3;
6626                         queue_id = (ring_id & 0x7) >> 0;
6627                         switch (me_id) {
6628                         case 0:
6629                                 /* This results in a full GPU reset, but all we need to do is soft
6630                                  * reset the CP for gfx
6631                                  */
6632                                 queue_reset = true;
6633                                 break;
6634                         case 1:
6635                                 /* XXX compute */
6636                                 queue_reset = true;
6637                                 break;
6638                         case 2:
6639                                 /* XXX compute */
6640                                 queue_reset = true;
6641                                 break;
6642                         }
6643                         break;
6644                 case 185: /* CP Privileged inst */
6645                         DRM_ERROR("Illegal instruction in command stream\n");
6646                         /* XXX check the bitfield order! */
6647                         me_id = (ring_id & 0x60) >> 5;
6648                         pipe_id = (ring_id & 0x18) >> 3;
6649                         queue_id = (ring_id & 0x7) >> 0;
6650                         switch (me_id) {
6651                         case 0:
6652                                 /* This results in a full GPU reset, but all we need to do is soft
6653                                  * reset the CP for gfx
6654                                  */
6655                                 queue_reset = true;
6656                                 break;
6657                         case 1:
6658                                 /* XXX compute */
6659                                 queue_reset = true;
6660                                 break;
6661                         case 2:
6662                                 /* XXX compute */
6663                                 queue_reset = true;
6664                                 break;
6665                         }
6666                         break;
6667                 case 224: /* SDMA trap event */
6668                         /* XXX check the bitfield order! */
6669                         me_id = (ring_id & 0x3) >> 0;
6670                         queue_id = (ring_id & 0xc) >> 2;
6671                         DRM_DEBUG("IH: SDMA trap\n");
6672                         switch (me_id) {
6673                         case 0:
6674                                 switch (queue_id) {
6675                                 case 0:
6676                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6677                                         break;
6678                                 case 1:
6679                                         /* XXX compute */
6680                                         break;
6681                                 case 2:
6682                                         /* XXX compute */
6683                                         break;
6684                                 }
6685                                 break;
6686                         case 1:
6687                                 switch (queue_id) {
6688                                 case 0:
6689                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6690                                         break;
6691                                 case 1:
6692                                         /* XXX compute */
6693                                         break;
6694                                 case 2:
6695                                         /* XXX compute */
6696                                         break;
6697                                 }
6698                                 break;
6699                         }
6700                         break;
6701                 case 230: /* thermal low to high */
6702                         DRM_DEBUG("IH: thermal low to high\n");
6703                         rdev->pm.dpm.thermal.high_to_low = false;
6704                         queue_thermal = true;
6705                         break;
6706                 case 231: /* thermal high to low */
6707                         DRM_DEBUG("IH: thermal high to low\n");
6708                         rdev->pm.dpm.thermal.high_to_low = true;
6709                         queue_thermal = true;
6710                         break;
6711                 case 233: /* GUI IDLE */
6712                         DRM_DEBUG("IH: GUI idle\n");
6713                         break;
6714                 case 241: /* SDMA Privileged inst */
6715                 case 247: /* SDMA Privileged inst */
6716                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
6717                         /* XXX check the bitfield order! */
6718                         me_id = (ring_id & 0x3) >> 0;
6719                         queue_id = (ring_id & 0xc) >> 2;
6720                         switch (me_id) {
6721                         case 0:
6722                                 switch (queue_id) {
6723                                 case 0:
6724                                         queue_reset = true;
6725                                         break;
6726                                 case 1:
6727                                         /* XXX compute */
6728                                         queue_reset = true;
6729                                         break;
6730                                 case 2:
6731                                         /* XXX compute */
6732                                         queue_reset = true;
6733                                         break;
6734                                 }
6735                                 break;
6736                         case 1:
6737                                 switch (queue_id) {
6738                                 case 0:
6739                                         queue_reset = true;
6740                                         break;
6741                                 case 1:
6742                                         /* XXX compute */
6743                                         queue_reset = true;
6744                                         break;
6745                                 case 2:
6746                                         /* XXX compute */
6747                                         queue_reset = true;
6748                                         break;
6749                                 }
6750                                 break;
6751                         }
6752                         break;
6753                 default:
6754                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6755                         break;
6756                 }
6757
6758                 /* wptr/rptr are in bytes! */
6759                 rptr += 16;
6760                 rptr &= rdev->ih.ptr_mask;
6761         }
6762         if (queue_hotplug)
6763                 schedule_work(&rdev->hotplug_work);
6764         if (queue_reset)
6765                 schedule_work(&rdev->reset_work);
6766         if (queue_thermal)
6767                 schedule_work(&rdev->pm.dpm.thermal.work);
6768         rdev->ih.rptr = rptr;
6769         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6770         atomic_set(&rdev->ih.lock, 0);
6771
6772         /* make sure wptr hasn't changed while processing */
6773         wptr = cik_get_ih_wptr(rdev);
6774         if (wptr != rptr)
6775                 goto restart_ih;
6776
6777         return IRQ_HANDLED;
6778 }
6779
6780 /*
6781  * startup/shutdown callbacks
6782  */
6783 /**
6784  * cik_startup - program the asic to a functional state
6785  *
6786  * @rdev: radeon_device pointer
6787  *
6788  * Programs the asic to a functional state (CIK).
6789  * Called by cik_init() and cik_resume().
6790  * Returns 0 for success, error for failure.
6791  */
6792 static int cik_startup(struct radeon_device *rdev)
6793 {
6794         struct radeon_ring *ring;
6795         int r;
6796
6797         /* enable pcie gen2/3 link */
6798         cik_pcie_gen3_enable(rdev);
6799         /* enable aspm */
6800         cik_program_aspm(rdev);
6801
6802         cik_mc_program(rdev);
6803
6804         if (rdev->flags & RADEON_IS_IGP) {
6805                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6806                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6807                         r = cik_init_microcode(rdev);
6808                         if (r) {
6809                                 DRM_ERROR("Failed to load firmware!\n");
6810                                 return r;
6811                         }
6812                 }
6813         } else {
6814                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6815                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6816                     !rdev->mc_fw) {
6817                         r = cik_init_microcode(rdev);
6818                         if (r) {
6819                                 DRM_ERROR("Failed to load firmware!\n");
6820                                 return r;
6821                         }
6822                 }
6823
6824                 r = ci_mc_load_microcode(rdev);
6825                 if (r) {
6826                         DRM_ERROR("Failed to load MC firmware!\n");
6827                         return r;
6828                 }
6829         }
6830
6831         r = r600_vram_scratch_init(rdev);
6832         if (r)
6833                 return r;
6834
6835         r = cik_pcie_gart_enable(rdev);
6836         if (r)
6837                 return r;
6838         cik_gpu_init(rdev);
6839
6840         /* allocate rlc buffers */
6841         if (rdev->flags & RADEON_IS_IGP) {
6842                 if (rdev->family == CHIP_KAVERI) {
6843                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
6844                         rdev->rlc.reg_list_size =
6845                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
6846                 } else {
6847                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
6848                         rdev->rlc.reg_list_size =
6849                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
6850                 }
6851         }
6852         rdev->rlc.cs_data = ci_cs_data;
6853         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
6854         r = sumo_rlc_init(rdev);
6855         if (r) {
6856                 DRM_ERROR("Failed to init rlc BOs!\n");
6857                 return r;
6858         }
6859
6860         /* allocate wb buffer */
6861         r = radeon_wb_init(rdev);
6862         if (r)
6863                 return r;
6864
6865         /* allocate mec buffers */
6866         r = cik_mec_init(rdev);
6867         if (r) {
6868                 DRM_ERROR("Failed to init MEC BOs!\n");
6869                 return r;
6870         }
6871
6872         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6873         if (r) {
6874                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6875                 return r;
6876         }
6877
6878         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6879         if (r) {
6880                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6881                 return r;
6882         }
6883
6884         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6885         if (r) {
6886                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6887                 return r;
6888         }
6889
6890         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6891         if (r) {
6892                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6893                 return r;
6894         }
6895
6896         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6897         if (r) {
6898                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6899                 return r;
6900         }
6901
6902         r = uvd_v4_2_resume(rdev);
6903         if (!r) {
6904                 r = radeon_fence_driver_start_ring(rdev,
6905                                                    R600_RING_TYPE_UVD_INDEX);
6906                 if (r)
6907                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6908         }
6909         if (r)
6910                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6911
6912         /* Enable IRQ */
6913         if (!rdev->irq.installed) {
6914                 r = radeon_irq_kms_init(rdev);
6915                 if (r)
6916                         return r;
6917         }
6918
6919         r = cik_irq_init(rdev);
6920         if (r) {
6921                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6922                 radeon_irq_kms_fini(rdev);
6923                 return r;
6924         }
6925         cik_irq_set(rdev);
6926
6927         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6928         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6929                              CP_RB0_RPTR, CP_RB0_WPTR,
6930                              RADEON_CP_PACKET2);
6931         if (r)
6932                 return r;
6933
6934         /* set up the compute queues */
6935         /* type-2 packets are deprecated on MEC, use type-3 instead */
6936         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6937         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6938                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6939                              PACKET3(PACKET3_NOP, 0x3FFF));
6940         if (r)
6941                 return r;
6942         ring->me = 1; /* first MEC */
6943         ring->pipe = 0; /* first pipe */
6944         ring->queue = 0; /* first queue */
6945         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6946
6947         /* type-2 packets are deprecated on MEC, use type-3 instead */
6948         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6949         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6950                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6951                              PACKET3(PACKET3_NOP, 0x3FFF));
6952         if (r)
6953                 return r;
6954         /* dGPU only have 1 MEC */
6955         ring->me = 1; /* first MEC */
6956         ring->pipe = 0; /* first pipe */
6957         ring->queue = 1; /* second queue */
6958         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6959
6960         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6961         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6962                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6963                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6964                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6965         if (r)
6966                 return r;
6967
6968         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6969         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6970                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6971                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6972                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6973         if (r)
6974                 return r;
6975
6976         r = cik_cp_resume(rdev);
6977         if (r)
6978                 return r;
6979
6980         r = cik_sdma_resume(rdev);
6981         if (r)
6982                 return r;
6983
6984         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6985         if (ring->ring_size) {
6986                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6987                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6988                                      RADEON_CP_PACKET2);
6989                 if (!r)
6990                         r = uvd_v1_0_init(rdev);
6991                 if (r)
6992                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6993         }
6994
6995         r = radeon_ib_pool_init(rdev);
6996         if (r) {
6997                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6998                 return r;
6999         }
7000
7001         r = radeon_vm_manager_init(rdev);
7002         if (r) {
7003                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7004                 return r;
7005         }
7006
7007         r = dce6_audio_init(rdev);
7008         if (r)
7009                 return r;
7010
7011         return 0;
7012 }
7013
7014 /**
7015  * cik_resume - resume the asic to a functional state
7016  *
7017  * @rdev: radeon_device pointer
7018  *
7019  * Programs the asic to a functional state (CIK).
7020  * Called at resume.
7021  * Returns 0 for success, error for failure.
7022  */
7023 int cik_resume(struct radeon_device *rdev)
7024 {
7025         int r;
7026
7027         /* post card */
7028         atom_asic_init(rdev->mode_info.atom_context);
7029
7030         /* init golden registers */
7031         cik_init_golden_registers(rdev);
7032
7033         rdev->accel_working = true;
7034         r = cik_startup(rdev);
7035         if (r) {
7036                 DRM_ERROR("cik startup failed on resume\n");
7037                 rdev->accel_working = false;
7038                 return r;
7039         }
7040
7041         return r;
7042
7043 }
7044
7045 /**
7046  * cik_suspend - suspend the asic
7047  *
7048  * @rdev: radeon_device pointer
7049  *
7050  * Bring the chip into a state suitable for suspend (CIK).
7051  * Called at suspend.
7052  * Returns 0 for success.
7053  */
7054 int cik_suspend(struct radeon_device *rdev)
7055 {
7056         dce6_audio_fini(rdev);
7057         radeon_vm_manager_fini(rdev);
7058         cik_cp_enable(rdev, false);
7059         cik_sdma_enable(rdev, false);
7060         uvd_v1_0_fini(rdev);
7061         radeon_uvd_suspend(rdev);
7062         cik_irq_suspend(rdev);
7063         radeon_wb_disable(rdev);
7064         cik_pcie_gart_disable(rdev);
7065         return 0;
7066 }
7067
7068 /* Plan is to move initialization in that function and use
7069  * helper function so that radeon_device_init pretty much
7070  * do nothing more than calling asic specific function. This
7071  * should also allow to remove a bunch of callback function
7072  * like vram_info.
7073  */
7074 /**
7075  * cik_init - asic specific driver and hw init
7076  *
7077  * @rdev: radeon_device pointer
7078  *
7079  * Setup asic specific driver variables and program the hw
7080  * to a functional state (CIK).
7081  * Called at driver startup.
7082  * Returns 0 for success, errors for failure.
7083  */
7084 int cik_init(struct radeon_device *rdev)
7085 {
7086         struct radeon_ring *ring;
7087         int r;
7088
7089         /* Read BIOS */
7090         if (!radeon_get_bios(rdev)) {
7091                 if (ASIC_IS_AVIVO(rdev))
7092                         return -EINVAL;
7093         }
7094         /* Must be an ATOMBIOS */
7095         if (!rdev->is_atom_bios) {
7096                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7097                 return -EINVAL;
7098         }
7099         r = radeon_atombios_init(rdev);
7100         if (r)
7101                 return r;
7102
7103         /* Post card if necessary */
7104         if (!radeon_card_posted(rdev)) {
7105                 if (!rdev->bios) {
7106                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7107                         return -EINVAL;
7108                 }
7109                 DRM_INFO("GPU not posted. posting now...\n");
7110                 atom_asic_init(rdev->mode_info.atom_context);
7111         }
7112         /* init golden registers */
7113         cik_init_golden_registers(rdev);
7114         /* Initialize scratch registers */
7115         cik_scratch_init(rdev);
7116         /* Initialize surface registers */
7117         radeon_surface_init(rdev);
7118         /* Initialize clocks */
7119         radeon_get_clock_info(rdev->ddev);
7120
7121         /* Fence driver */
7122         r = radeon_fence_driver_init(rdev);
7123         if (r)
7124                 return r;
7125
7126         /* initialize memory controller */
7127         r = cik_mc_init(rdev);
7128         if (r)
7129                 return r;
7130         /* Memory manager */
7131         r = radeon_bo_init(rdev);
7132         if (r)
7133                 return r;
7134
7135         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7136         ring->ring_obj = NULL;
7137         r600_ring_init(rdev, ring, 1024 * 1024);
7138
7139         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7140         ring->ring_obj = NULL;
7141         r600_ring_init(rdev, ring, 1024 * 1024);
7142         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7143         if (r)
7144                 return r;
7145
7146         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7147         ring->ring_obj = NULL;
7148         r600_ring_init(rdev, ring, 1024 * 1024);
7149         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7150         if (r)
7151                 return r;
7152
7153         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7154         ring->ring_obj = NULL;
7155         r600_ring_init(rdev, ring, 256 * 1024);
7156
7157         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7158         ring->ring_obj = NULL;
7159         r600_ring_init(rdev, ring, 256 * 1024);
7160
7161         r = radeon_uvd_init(rdev);
7162         if (!r) {
7163                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7164                 ring->ring_obj = NULL;
7165                 r600_ring_init(rdev, ring, 4096);
7166         }
7167
7168         rdev->ih.ring_obj = NULL;
7169         r600_ih_ring_init(rdev, 64 * 1024);
7170
7171         r = r600_pcie_gart_init(rdev);
7172         if (r)
7173                 return r;
7174
7175         rdev->accel_working = true;
7176         r = cik_startup(rdev);
7177         if (r) {
7178                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7179                 cik_cp_fini(rdev);
7180                 cik_sdma_fini(rdev);
7181                 cik_irq_fini(rdev);
7182                 sumo_rlc_fini(rdev);
7183                 cik_mec_fini(rdev);
7184                 radeon_wb_fini(rdev);
7185                 radeon_ib_pool_fini(rdev);
7186                 radeon_vm_manager_fini(rdev);
7187                 radeon_irq_kms_fini(rdev);
7188                 cik_pcie_gart_fini(rdev);
7189                 rdev->accel_working = false;
7190         }
7191
7192         /* Don't start up if the MC ucode is missing.
7193          * The default clocks and voltages before the MC ucode
7194          * is loaded are not suffient for advanced operations.
7195          */
7196         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7197                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7198                 return -EINVAL;
7199         }
7200
7201         return 0;
7202 }
7203
7204 /**
7205  * cik_fini - asic specific driver and hw fini
7206  *
7207  * @rdev: radeon_device pointer
7208  *
7209  * Tear down the asic specific driver variables and program the hw
7210  * to an idle state (CIK).
7211  * Called at driver unload.
7212  */
7213 void cik_fini(struct radeon_device *rdev)
7214 {
7215         cik_cp_fini(rdev);
7216         cik_sdma_fini(rdev);
7217         cik_irq_fini(rdev);
7218         sumo_rlc_fini(rdev);
7219         cik_mec_fini(rdev);
7220         radeon_wb_fini(rdev);
7221         radeon_vm_manager_fini(rdev);
7222         radeon_ib_pool_fini(rdev);
7223         radeon_irq_kms_fini(rdev);
7224         uvd_v1_0_fini(rdev);
7225         radeon_uvd_fini(rdev);
7226         cik_pcie_gart_fini(rdev);
7227         r600_vram_scratch_fini(rdev);
7228         radeon_gem_fini(rdev);
7229         radeon_fence_driver_fini(rdev);
7230         radeon_bo_fini(rdev);
7231         radeon_atombios_fini(rdev);
7232         kfree(rdev->bios);
7233         rdev->bios = NULL;
7234 }
7235
7236 /* display watermark setup */
7237 /**
7238  * dce8_line_buffer_adjust - Set up the line buffer
7239  *
7240  * @rdev: radeon_device pointer
7241  * @radeon_crtc: the selected display controller
7242  * @mode: the current display mode on the selected display
7243  * controller
7244  *
7245  * Setup up the line buffer allocation for
7246  * the selected display controller (CIK).
7247  * Returns the line buffer size in pixels.
7248  */
7249 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7250                                    struct radeon_crtc *radeon_crtc,
7251                                    struct drm_display_mode *mode)
7252 {
7253         u32 tmp;
7254
7255         /*
7256          * Line Buffer Setup
7257          * There are 6 line buffers, one for each display controllers.
7258          * There are 3 partitions per LB. Select the number of partitions
7259          * to enable based on the display width.  For display widths larger
7260          * than 4096, you need use to use 2 display controllers and combine
7261          * them using the stereo blender.
7262          */
7263         if (radeon_crtc->base.enabled && mode) {
7264                 if (mode->crtc_hdisplay < 1920)
7265                         tmp = 1;
7266                 else if (mode->crtc_hdisplay < 2560)
7267                         tmp = 2;
7268                 else if (mode->crtc_hdisplay < 4096)
7269                         tmp = 0;
7270                 else {
7271                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7272                         tmp = 0;
7273                 }
7274         } else
7275                 tmp = 1;
7276
7277         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7278                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7279
7280         if (radeon_crtc->base.enabled && mode) {
7281                 switch (tmp) {
7282                 case 0:
7283                 default:
7284                         return 4096 * 2;
7285                 case 1:
7286                         return 1920 * 2;
7287                 case 2:
7288                         return 2560 * 2;
7289                 }
7290         }
7291
7292         /* controller not enabled, so no lb used */
7293         return 0;
7294 }
7295
7296 /**
7297  * cik_get_number_of_dram_channels - get the number of dram channels
7298  *
7299  * @rdev: radeon_device pointer
7300  *
7301  * Look up the number of video ram channels (CIK).
7302  * Used for display watermark bandwidth calculations
7303  * Returns the number of dram channels
7304  */
7305 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7306 {
7307         u32 tmp = RREG32(MC_SHARED_CHMAP);
7308
7309         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7310         case 0:
7311         default:
7312                 return 1;
7313         case 1:
7314                 return 2;
7315         case 2:
7316                 return 4;
7317         case 3:
7318                 return 8;
7319         case 4:
7320                 return 3;
7321         case 5:
7322                 return 6;
7323         case 6:
7324                 return 10;
7325         case 7:
7326                 return 12;
7327         case 8:
7328                 return 16;
7329         }
7330 }
7331
7332 struct dce8_wm_params {
7333         u32 dram_channels; /* number of dram channels */
7334         u32 yclk;          /* bandwidth per dram data pin in kHz */
7335         u32 sclk;          /* engine clock in kHz */
7336         u32 disp_clk;      /* display clock in kHz */
7337         u32 src_width;     /* viewport width */
7338         u32 active_time;   /* active display time in ns */
7339         u32 blank_time;    /* blank time in ns */
7340         bool interlaced;    /* mode is interlaced */
7341         fixed20_12 vsc;    /* vertical scale ratio */
7342         u32 num_heads;     /* number of active crtcs */
7343         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7344         u32 lb_size;       /* line buffer allocated to pipe */
7345         u32 vtaps;         /* vertical scaler taps */
7346 };
7347
7348 /**
7349  * dce8_dram_bandwidth - get the dram bandwidth
7350  *
7351  * @wm: watermark calculation data
7352  *
7353  * Calculate the raw dram bandwidth (CIK).
7354  * Used for display watermark bandwidth calculations
7355  * Returns the dram bandwidth in MBytes/s
7356  */
7357 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7358 {
7359         /* Calculate raw DRAM Bandwidth */
7360         fixed20_12 dram_efficiency; /* 0.7 */
7361         fixed20_12 yclk, dram_channels, bandwidth;
7362         fixed20_12 a;
7363
7364         a.full = dfixed_const(1000);
7365         yclk.full = dfixed_const(wm->yclk);
7366         yclk.full = dfixed_div(yclk, a);
7367         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7368         a.full = dfixed_const(10);
7369         dram_efficiency.full = dfixed_const(7);
7370         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7371         bandwidth.full = dfixed_mul(dram_channels, yclk);
7372         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7373
7374         return dfixed_trunc(bandwidth);
7375 }
7376
7377 /**
7378  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7379  *
7380  * @wm: watermark calculation data
7381  *
7382  * Calculate the dram bandwidth used for display (CIK).
7383  * Used for display watermark bandwidth calculations
7384  * Returns the dram bandwidth for display in MBytes/s
7385  */
7386 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7387 {
7388         /* Calculate DRAM Bandwidth and the part allocated to display. */
7389         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7390         fixed20_12 yclk, dram_channels, bandwidth;
7391         fixed20_12 a;
7392
7393         a.full = dfixed_const(1000);
7394         yclk.full = dfixed_const(wm->yclk);
7395         yclk.full = dfixed_div(yclk, a);
7396         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7397         a.full = dfixed_const(10);
7398         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7399         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7400         bandwidth.full = dfixed_mul(dram_channels, yclk);
7401         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7402
7403         return dfixed_trunc(bandwidth);
7404 }
7405
7406 /**
7407  * dce8_data_return_bandwidth - get the data return bandwidth
7408  *
7409  * @wm: watermark calculation data
7410  *
7411  * Calculate the data return bandwidth used for display (CIK).
7412  * Used for display watermark bandwidth calculations
7413  * Returns the data return bandwidth in MBytes/s
7414  */
7415 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7416 {
7417         /* Calculate the display Data return Bandwidth */
7418         fixed20_12 return_efficiency; /* 0.8 */
7419         fixed20_12 sclk, bandwidth;
7420         fixed20_12 a;
7421
7422         a.full = dfixed_const(1000);
7423         sclk.full = dfixed_const(wm->sclk);
7424         sclk.full = dfixed_div(sclk, a);
7425         a.full = dfixed_const(10);
7426         return_efficiency.full = dfixed_const(8);
7427         return_efficiency.full = dfixed_div(return_efficiency, a);
7428         a.full = dfixed_const(32);
7429         bandwidth.full = dfixed_mul(a, sclk);
7430         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7431
7432         return dfixed_trunc(bandwidth);
7433 }
7434
7435 /**
7436  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7437  *
7438  * @wm: watermark calculation data
7439  *
7440  * Calculate the dmif bandwidth used for display (CIK).
7441  * Used for display watermark bandwidth calculations
7442  * Returns the dmif bandwidth in MBytes/s
7443  */
7444 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7445 {
7446         /* Calculate the DMIF Request Bandwidth */
7447         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7448         fixed20_12 disp_clk, bandwidth;
7449         fixed20_12 a, b;
7450
7451         a.full = dfixed_const(1000);
7452         disp_clk.full = dfixed_const(wm->disp_clk);
7453         disp_clk.full = dfixed_div(disp_clk, a);
7454         a.full = dfixed_const(32);
7455         b.full = dfixed_mul(a, disp_clk);
7456
7457         a.full = dfixed_const(10);
7458         disp_clk_request_efficiency.full = dfixed_const(8);
7459         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7460
7461         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7462
7463         return dfixed_trunc(bandwidth);
7464 }
7465
7466 /**
7467  * dce8_available_bandwidth - get the min available bandwidth
7468  *
7469  * @wm: watermark calculation data
7470  *
7471  * Calculate the min available bandwidth used for display (CIK).
7472  * Used for display watermark bandwidth calculations
7473  * Returns the min available bandwidth in MBytes/s
7474  */
7475 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7476 {
7477         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7478         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7479         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7480         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7481
7482         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7483 }
7484
7485 /**
7486  * dce8_average_bandwidth - get the average available bandwidth
7487  *
7488  * @wm: watermark calculation data
7489  *
7490  * Calculate the average available bandwidth used for display (CIK).
7491  * Used for display watermark bandwidth calculations
7492  * Returns the average available bandwidth in MBytes/s
7493  */
7494 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7495 {
7496         /* Calculate the display mode Average Bandwidth
7497          * DisplayMode should contain the source and destination dimensions,
7498          * timing, etc.
7499          */
7500         fixed20_12 bpp;
7501         fixed20_12 line_time;
7502         fixed20_12 src_width;
7503         fixed20_12 bandwidth;
7504         fixed20_12 a;
7505
7506         a.full = dfixed_const(1000);
7507         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7508         line_time.full = dfixed_div(line_time, a);
7509         bpp.full = dfixed_const(wm->bytes_per_pixel);
7510         src_width.full = dfixed_const(wm->src_width);
7511         bandwidth.full = dfixed_mul(src_width, bpp);
7512         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7513         bandwidth.full = dfixed_div(bandwidth, line_time);
7514
7515         return dfixed_trunc(bandwidth);
7516 }
7517
7518 /**
7519  * dce8_latency_watermark - get the latency watermark
7520  *
7521  * @wm: watermark calculation data
7522  *
7523  * Calculate the latency watermark (CIK).
7524  * Used for display watermark bandwidth calculations
7525  * Returns the latency watermark in ns
7526  */
7527 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7528 {
7529         /* First calculate the latency in ns */
7530         u32 mc_latency = 2000; /* 2000 ns. */
7531         u32 available_bandwidth = dce8_available_bandwidth(wm);
7532         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7533         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7534         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7535         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7536                 (wm->num_heads * cursor_line_pair_return_time);
7537         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7538         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7539         u32 tmp, dmif_size = 12288;
7540         fixed20_12 a, b, c;
7541
7542         if (wm->num_heads == 0)
7543                 return 0;
7544
7545         a.full = dfixed_const(2);
7546         b.full = dfixed_const(1);
7547         if ((wm->vsc.full > a.full) ||
7548             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7549             (wm->vtaps >= 5) ||
7550             ((wm->vsc.full >= a.full) && wm->interlaced))
7551                 max_src_lines_per_dst_line = 4;
7552         else
7553                 max_src_lines_per_dst_line = 2;
7554
7555         a.full = dfixed_const(available_bandwidth);
7556         b.full = dfixed_const(wm->num_heads);
7557         a.full = dfixed_div(a, b);
7558
7559         b.full = dfixed_const(mc_latency + 512);
7560         c.full = dfixed_const(wm->disp_clk);
7561         b.full = dfixed_div(b, c);
7562
7563         c.full = dfixed_const(dmif_size);
7564         b.full = dfixed_div(c, b);
7565
7566         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7567
7568         b.full = dfixed_const(1000);
7569         c.full = dfixed_const(wm->disp_clk);
7570         b.full = dfixed_div(c, b);
7571         c.full = dfixed_const(wm->bytes_per_pixel);
7572         b.full = dfixed_mul(b, c);
7573
7574         lb_fill_bw = min(tmp, dfixed_trunc(b));
7575
7576         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7577         b.full = dfixed_const(1000);
7578         c.full = dfixed_const(lb_fill_bw);
7579         b.full = dfixed_div(c, b);
7580         a.full = dfixed_div(a, b);
7581         line_fill_time = dfixed_trunc(a);
7582
7583         if (line_fill_time < wm->active_time)
7584                 return latency;
7585         else
7586                 return latency + (line_fill_time - wm->active_time);
7587
7588 }
7589
7590 /**
7591  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7592  * average and available dram bandwidth
7593  *
7594  * @wm: watermark calculation data
7595  *
7596  * Check if the display average bandwidth fits in the display
7597  * dram bandwidth (CIK).
7598  * Used for display watermark bandwidth calculations
7599  * Returns true if the display fits, false if not.
7600  */
7601 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7602 {
7603         if (dce8_average_bandwidth(wm) <=
7604             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7605                 return true;
7606         else
7607                 return false;
7608 }
7609
7610 /**
7611  * dce8_average_bandwidth_vs_available_bandwidth - check
7612  * average and available bandwidth
7613  *
7614  * @wm: watermark calculation data
7615  *
7616  * Check if the display average bandwidth fits in the display
7617  * available bandwidth (CIK).
7618  * Used for display watermark bandwidth calculations
7619  * Returns true if the display fits, false if not.
7620  */
7621 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7622 {
7623         if (dce8_average_bandwidth(wm) <=
7624             (dce8_available_bandwidth(wm) / wm->num_heads))
7625                 return true;
7626         else
7627                 return false;
7628 }
7629
7630 /**
7631  * dce8_check_latency_hiding - check latency hiding
7632  *
7633  * @wm: watermark calculation data
7634  *
7635  * Check latency hiding (CIK).
7636  * Used for display watermark bandwidth calculations
7637  * Returns true if the display fits, false if not.
7638  */
7639 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7640 {
7641         u32 lb_partitions = wm->lb_size / wm->src_width;
7642         u32 line_time = wm->active_time + wm->blank_time;
7643         u32 latency_tolerant_lines;
7644         u32 latency_hiding;
7645         fixed20_12 a;
7646
7647         a.full = dfixed_const(1);
7648         if (wm->vsc.full > a.full)
7649                 latency_tolerant_lines = 1;
7650         else {
7651                 if (lb_partitions <= (wm->vtaps + 1))
7652                         latency_tolerant_lines = 1;
7653                 else
7654                         latency_tolerant_lines = 2;
7655         }
7656
7657         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7658
7659         if (dce8_latency_watermark(wm) <= latency_hiding)
7660                 return true;
7661         else
7662                 return false;
7663 }
7664
7665 /**
7666  * dce8_program_watermarks - program display watermarks
7667  *
7668  * @rdev: radeon_device pointer
7669  * @radeon_crtc: the selected display controller
7670  * @lb_size: line buffer size
7671  * @num_heads: number of display controllers in use
7672  *
7673  * Calculate and program the display watermarks for the
7674  * selected display controller (CIK).
7675  */
7676 static void dce8_program_watermarks(struct radeon_device *rdev,
7677                                     struct radeon_crtc *radeon_crtc,
7678                                     u32 lb_size, u32 num_heads)
7679 {
7680         struct drm_display_mode *mode = &radeon_crtc->base.mode;
7681         struct dce8_wm_params wm_low, wm_high;
7682         u32 pixel_period;
7683         u32 line_time = 0;
7684         u32 latency_watermark_a = 0, latency_watermark_b = 0;
7685         u32 tmp, wm_mask;
7686
7687         if (radeon_crtc->base.enabled && num_heads && mode) {
7688                 pixel_period = 1000000 / (u32)mode->clock;
7689                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7690
7691                 /* watermark for high clocks */
7692                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7693                     rdev->pm.dpm_enabled) {
7694                         wm_high.yclk =
7695                                 radeon_dpm_get_mclk(rdev, false) * 10;
7696                         wm_high.sclk =
7697                                 radeon_dpm_get_sclk(rdev, false) * 10;
7698                 } else {
7699                         wm_high.yclk = rdev->pm.current_mclk * 10;
7700                         wm_high.sclk = rdev->pm.current_sclk * 10;
7701                 }
7702
7703                 wm_high.disp_clk = mode->clock;
7704                 wm_high.src_width = mode->crtc_hdisplay;
7705                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7706                 wm_high.blank_time = line_time - wm_high.active_time;
7707                 wm_high.interlaced = false;
7708                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7709                         wm_high.interlaced = true;
7710                 wm_high.vsc = radeon_crtc->vsc;
7711                 wm_high.vtaps = 1;
7712                 if (radeon_crtc->rmx_type != RMX_OFF)
7713                         wm_high.vtaps = 2;
7714                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7715                 wm_high.lb_size = lb_size;
7716                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7717                 wm_high.num_heads = num_heads;
7718
7719                 /* set for high clocks */
7720                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7721
7722                 /* possibly force display priority to high */
7723                 /* should really do this at mode validation time... */
7724                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7725                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7726                     !dce8_check_latency_hiding(&wm_high) ||
7727                     (rdev->disp_priority == 2)) {
7728                         DRM_DEBUG_KMS("force priority to high\n");
7729                 }
7730
7731                 /* watermark for low clocks */
7732                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7733                     rdev->pm.dpm_enabled) {
7734                         wm_low.yclk =
7735                                 radeon_dpm_get_mclk(rdev, true) * 10;
7736                         wm_low.sclk =
7737                                 radeon_dpm_get_sclk(rdev, true) * 10;
7738                 } else {
7739                         wm_low.yclk = rdev->pm.current_mclk * 10;
7740                         wm_low.sclk = rdev->pm.current_sclk * 10;
7741                 }
7742
7743                 wm_low.disp_clk = mode->clock;
7744                 wm_low.src_width = mode->crtc_hdisplay;
7745                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7746                 wm_low.blank_time = line_time - wm_low.active_time;
7747                 wm_low.interlaced = false;
7748                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7749                         wm_low.interlaced = true;
7750                 wm_low.vsc = radeon_crtc->vsc;
7751                 wm_low.vtaps = 1;
7752                 if (radeon_crtc->rmx_type != RMX_OFF)
7753                         wm_low.vtaps = 2;
7754                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7755                 wm_low.lb_size = lb_size;
7756                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7757                 wm_low.num_heads = num_heads;
7758
7759                 /* set for low clocks */
7760                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7761
7762                 /* possibly force display priority to high */
7763                 /* should really do this at mode validation time... */
7764                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7765                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7766                     !dce8_check_latency_hiding(&wm_low) ||
7767                     (rdev->disp_priority == 2)) {
7768                         DRM_DEBUG_KMS("force priority to high\n");
7769                 }
7770         }
7771
7772         /* select wm A */
7773         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7774         tmp = wm_mask;
7775         tmp &= ~LATENCY_WATERMARK_MASK(3);
7776         tmp |= LATENCY_WATERMARK_MASK(1);
7777         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7778         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7779                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7780                 LATENCY_HIGH_WATERMARK(line_time)));
7781         /* select wm B */
7782         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7783         tmp &= ~LATENCY_WATERMARK_MASK(3);
7784         tmp |= LATENCY_WATERMARK_MASK(2);
7785         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7786         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7787                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7788                 LATENCY_HIGH_WATERMARK(line_time)));
7789         /* restore original selection */
7790         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7791
7792         /* save values for DPM */
7793         radeon_crtc->line_time = line_time;
7794         radeon_crtc->wm_high = latency_watermark_a;
7795         radeon_crtc->wm_low = latency_watermark_b;
7796 }
7797
7798 /**
7799  * dce8_bandwidth_update - program display watermarks
7800  *
7801  * @rdev: radeon_device pointer
7802  *
7803  * Calculate and program the display watermarks and line
7804  * buffer allocation (CIK).
7805  */
7806 void dce8_bandwidth_update(struct radeon_device *rdev)
7807 {
7808         struct drm_display_mode *mode = NULL;
7809         u32 num_heads = 0, lb_size;
7810         int i;
7811
7812         radeon_update_display_priority(rdev);
7813
7814         for (i = 0; i < rdev->num_crtc; i++) {
7815                 if (rdev->mode_info.crtcs[i]->base.enabled)
7816                         num_heads++;
7817         }
7818         for (i = 0; i < rdev->num_crtc; i++) {
7819                 mode = &rdev->mode_info.crtcs[i]->base.mode;
7820                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
7821                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
7822         }
7823 }
7824
7825 /**
7826  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
7827  *
7828  * @rdev: radeon_device pointer
7829  *
7830  * Fetches a GPU clock counter snapshot (SI).
7831  * Returns the 64 bit clock counter snapshot.
7832  */
7833 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
7834 {
7835         uint64_t clock;
7836
7837         mutex_lock(&rdev->gpu_clock_mutex);
7838         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7839         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7840                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7841         mutex_unlock(&rdev->gpu_clock_mutex);
7842         return clock;
7843 }
7844
7845 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
7846                               u32 cntl_reg, u32 status_reg)
7847 {
7848         int r, i;
7849         struct atom_clock_dividers dividers;
7850         uint32_t tmp;
7851
7852         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
7853                                            clock, false, &dividers);
7854         if (r)
7855                 return r;
7856
7857         tmp = RREG32_SMC(cntl_reg);
7858         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
7859         tmp |= dividers.post_divider;
7860         WREG32_SMC(cntl_reg, tmp);
7861
7862         for (i = 0; i < 100; i++) {
7863                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
7864                         break;
7865                 mdelay(10);
7866         }
7867         if (i == 100)
7868                 return -ETIMEDOUT;
7869
7870         return 0;
7871 }
7872
7873 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7874 {
7875         int r = 0;
7876
7877         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
7878         if (r)
7879                 return r;
7880
7881         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
7882         return r;
7883 }
7884
7885 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
7886 {
7887         struct pci_dev *root = rdev->pdev->bus->self;
7888         int bridge_pos, gpu_pos;
7889         u32 speed_cntl, mask, current_data_rate;
7890         int ret, i;
7891         u16 tmp16;
7892
7893         if (radeon_pcie_gen2 == 0)
7894                 return;
7895
7896         if (rdev->flags & RADEON_IS_IGP)
7897                 return;
7898
7899         if (!(rdev->flags & RADEON_IS_PCIE))
7900                 return;
7901
7902         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7903         if (ret != 0)
7904                 return;
7905
7906         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7907                 return;
7908
7909         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7910         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7911                 LC_CURRENT_DATA_RATE_SHIFT;
7912         if (mask & DRM_PCIE_SPEED_80) {
7913                 if (current_data_rate == 2) {
7914                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7915                         return;
7916                 }
7917                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7918         } else if (mask & DRM_PCIE_SPEED_50) {
7919                 if (current_data_rate == 1) {
7920                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7921                         return;
7922                 }
7923                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7924         }
7925
7926         bridge_pos = pci_pcie_cap(root);
7927         if (!bridge_pos)
7928                 return;
7929
7930         gpu_pos = pci_pcie_cap(rdev->pdev);
7931         if (!gpu_pos)
7932                 return;
7933
7934         if (mask & DRM_PCIE_SPEED_80) {
7935                 /* re-try equalization if gen3 is not already enabled */
7936                 if (current_data_rate != 2) {
7937                         u16 bridge_cfg, gpu_cfg;
7938                         u16 bridge_cfg2, gpu_cfg2;
7939                         u32 max_lw, current_lw, tmp;
7940
7941                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7942                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7943
7944                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7945                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7946
7947                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7948                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7949
7950                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7951                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7952                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7953
7954                         if (current_lw < max_lw) {
7955                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7956                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7957                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7958                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7959                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7960                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7961                                 }
7962                         }
7963
7964                         for (i = 0; i < 10; i++) {
7965                                 /* check status */
7966                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7967                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7968                                         break;
7969
7970                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7971                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7972
7973                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7974                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7975
7976                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7977                                 tmp |= LC_SET_QUIESCE;
7978                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7979
7980                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7981                                 tmp |= LC_REDO_EQ;
7982                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7983
7984                                 mdelay(100);
7985
7986                                 /* linkctl */
7987                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7988                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7989                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7990                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7991
7992                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7993                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7994                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7995                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7996
7997                                 /* linkctl2 */
7998                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7999                                 tmp16 &= ~((1 << 4) | (7 << 9));
8000                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8001                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8002
8003                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8004                                 tmp16 &= ~((1 << 4) | (7 << 9));
8005                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8006                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8007
8008                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8009                                 tmp &= ~LC_SET_QUIESCE;
8010                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8011                         }
8012                 }
8013         }
8014
8015         /* set the link speed */
8016         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8017         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8018         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8019
8020         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8021         tmp16 &= ~0xf;
8022         if (mask & DRM_PCIE_SPEED_80)
8023                 tmp16 |= 3; /* gen3 */
8024         else if (mask & DRM_PCIE_SPEED_50)
8025                 tmp16 |= 2; /* gen2 */
8026         else
8027                 tmp16 |= 1; /* gen1 */
8028         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8029
8030         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8031         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8032         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8033
8034         for (i = 0; i < rdev->usec_timeout; i++) {
8035                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8036                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8037                         break;
8038                 udelay(1);
8039         }
8040 }
8041
8042 static void cik_program_aspm(struct radeon_device *rdev)
8043 {
8044         u32 data, orig;
8045         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8046         bool disable_clkreq = false;
8047
8048         if (radeon_aspm == 0)
8049                 return;
8050
8051         /* XXX double check IGPs */
8052         if (rdev->flags & RADEON_IS_IGP)
8053                 return;
8054
8055         if (!(rdev->flags & RADEON_IS_PCIE))
8056                 return;
8057
8058         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8059         data &= ~LC_XMIT_N_FTS_MASK;
8060         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8061         if (orig != data)
8062                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8063
8064         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8065         data |= LC_GO_TO_RECOVERY;
8066         if (orig != data)
8067                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8068
8069         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8070         data |= P_IGNORE_EDB_ERR;
8071         if (orig != data)
8072                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8073
8074         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8075         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8076         data |= LC_PMI_TO_L1_DIS;
8077         if (!disable_l0s)
8078                 data |= LC_L0S_INACTIVITY(7);
8079
8080         if (!disable_l1) {
8081                 data |= LC_L1_INACTIVITY(7);
8082                 data &= ~LC_PMI_TO_L1_DIS;
8083                 if (orig != data)
8084                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8085
8086                 if (!disable_plloff_in_l1) {
8087                         bool clk_req_support;
8088
8089                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8090                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8091                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8092                         if (orig != data)
8093                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8094
8095                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8096                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8097                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8098                         if (orig != data)
8099                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8100
8101                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8102                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8103                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8104                         if (orig != data)
8105                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8106
8107                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8108                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8109                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8110                         if (orig != data)
8111                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8112
8113                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8114                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8115                         data |= LC_DYN_LANES_PWR_STATE(3);
8116                         if (orig != data)
8117                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8118
8119                         if (!disable_clkreq) {
8120                                 struct pci_dev *root = rdev->pdev->bus->self;
8121                                 u32 lnkcap;
8122
8123                                 clk_req_support = false;
8124                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8125                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8126                                         clk_req_support = true;
8127                         } else {
8128                                 clk_req_support = false;
8129                         }
8130
8131                         if (clk_req_support) {
8132                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8133                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8134                                 if (orig != data)
8135                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8136
8137                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8138                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8139                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8140                                 if (orig != data)
8141                                         WREG32_SMC(THM_CLK_CNTL, data);
8142
8143                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8144                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8145                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8146                                 if (orig != data)
8147                                         WREG32_SMC(MISC_CLK_CTRL, data);
8148
8149                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8150                                 data &= ~BCLK_AS_XCLK;
8151                                 if (orig != data)
8152                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8153
8154                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8155                                 data &= ~FORCE_BIF_REFCLK_EN;
8156                                 if (orig != data)
8157                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8158
8159                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8160                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8161                                 data |= MPLL_CLKOUT_SEL(4);
8162                                 if (orig != data)
8163                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8164                         }
8165                 }
8166         } else {
8167                 if (orig != data)
8168                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8169         }
8170
8171         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8172         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8173         if (orig != data)
8174                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8175
8176         if (!disable_l0s) {
8177                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8178                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8179                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8180                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8181                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8182                                 data &= ~LC_L0S_INACTIVITY_MASK;
8183                                 if (orig != data)
8184                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8185                         }
8186                 }
8187         }
8188 }