2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
81 /* get temperature in millidegrees */
82 int ci_get_temp(struct radeon_device *rdev)
87 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
93 actual_temp = temp & 0x1ff;
95 actual_temp = actual_temp * 1000;
100 /* get temperature in millidegrees */
101 int kv_get_temp(struct radeon_device *rdev)
106 temp = RREG32_SMC(0xC0300E0C);
109 actual_temp = (temp / 8) - 49;
113 actual_temp = actual_temp * 1000;
119 * Indirect registers accessor
121 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
125 WREG32(PCIE_INDEX, reg);
126 (void)RREG32(PCIE_INDEX);
127 r = RREG32(PCIE_DATA);
131 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
133 WREG32(PCIE_INDEX, reg);
134 (void)RREG32(PCIE_INDEX);
135 WREG32(PCIE_DATA, v);
136 (void)RREG32(PCIE_DATA);
139 static const u32 spectre_rlc_save_restore_register_list[] =
141 (0x0e00 << 16) | (0xc12c >> 2),
143 (0x0e00 << 16) | (0xc140 >> 2),
145 (0x0e00 << 16) | (0xc150 >> 2),
147 (0x0e00 << 16) | (0xc15c >> 2),
149 (0x0e00 << 16) | (0xc168 >> 2),
151 (0x0e00 << 16) | (0xc170 >> 2),
153 (0x0e00 << 16) | (0xc178 >> 2),
155 (0x0e00 << 16) | (0xc204 >> 2),
157 (0x0e00 << 16) | (0xc2b4 >> 2),
159 (0x0e00 << 16) | (0xc2b8 >> 2),
161 (0x0e00 << 16) | (0xc2bc >> 2),
163 (0x0e00 << 16) | (0xc2c0 >> 2),
165 (0x0e00 << 16) | (0x8228 >> 2),
167 (0x0e00 << 16) | (0x829c >> 2),
169 (0x0e00 << 16) | (0x869c >> 2),
171 (0x0600 << 16) | (0x98f4 >> 2),
173 (0x0e00 << 16) | (0x98f8 >> 2),
175 (0x0e00 << 16) | (0x9900 >> 2),
177 (0x0e00 << 16) | (0xc260 >> 2),
179 (0x0e00 << 16) | (0x90e8 >> 2),
181 (0x0e00 << 16) | (0x3c000 >> 2),
183 (0x0e00 << 16) | (0x3c00c >> 2),
185 (0x0e00 << 16) | (0x8c1c >> 2),
187 (0x0e00 << 16) | (0x9700 >> 2),
189 (0x0e00 << 16) | (0xcd20 >> 2),
191 (0x4e00 << 16) | (0xcd20 >> 2),
193 (0x5e00 << 16) | (0xcd20 >> 2),
195 (0x6e00 << 16) | (0xcd20 >> 2),
197 (0x7e00 << 16) | (0xcd20 >> 2),
199 (0x8e00 << 16) | (0xcd20 >> 2),
201 (0x9e00 << 16) | (0xcd20 >> 2),
203 (0xae00 << 16) | (0xcd20 >> 2),
205 (0xbe00 << 16) | (0xcd20 >> 2),
207 (0x0e00 << 16) | (0x89bc >> 2),
209 (0x0e00 << 16) | (0x8900 >> 2),
212 (0x0e00 << 16) | (0xc130 >> 2),
214 (0x0e00 << 16) | (0xc134 >> 2),
216 (0x0e00 << 16) | (0xc1fc >> 2),
218 (0x0e00 << 16) | (0xc208 >> 2),
220 (0x0e00 << 16) | (0xc264 >> 2),
222 (0x0e00 << 16) | (0xc268 >> 2),
224 (0x0e00 << 16) | (0xc26c >> 2),
226 (0x0e00 << 16) | (0xc270 >> 2),
228 (0x0e00 << 16) | (0xc274 >> 2),
230 (0x0e00 << 16) | (0xc278 >> 2),
232 (0x0e00 << 16) | (0xc27c >> 2),
234 (0x0e00 << 16) | (0xc280 >> 2),
236 (0x0e00 << 16) | (0xc284 >> 2),
238 (0x0e00 << 16) | (0xc288 >> 2),
240 (0x0e00 << 16) | (0xc28c >> 2),
242 (0x0e00 << 16) | (0xc290 >> 2),
244 (0x0e00 << 16) | (0xc294 >> 2),
246 (0x0e00 << 16) | (0xc298 >> 2),
248 (0x0e00 << 16) | (0xc29c >> 2),
250 (0x0e00 << 16) | (0xc2a0 >> 2),
252 (0x0e00 << 16) | (0xc2a4 >> 2),
254 (0x0e00 << 16) | (0xc2a8 >> 2),
256 (0x0e00 << 16) | (0xc2ac >> 2),
258 (0x0e00 << 16) | (0xc2b0 >> 2),
260 (0x0e00 << 16) | (0x301d0 >> 2),
262 (0x0e00 << 16) | (0x30238 >> 2),
264 (0x0e00 << 16) | (0x30250 >> 2),
266 (0x0e00 << 16) | (0x30254 >> 2),
268 (0x0e00 << 16) | (0x30258 >> 2),
270 (0x0e00 << 16) | (0x3025c >> 2),
272 (0x4e00 << 16) | (0xc900 >> 2),
274 (0x5e00 << 16) | (0xc900 >> 2),
276 (0x6e00 << 16) | (0xc900 >> 2),
278 (0x7e00 << 16) | (0xc900 >> 2),
280 (0x8e00 << 16) | (0xc900 >> 2),
282 (0x9e00 << 16) | (0xc900 >> 2),
284 (0xae00 << 16) | (0xc900 >> 2),
286 (0xbe00 << 16) | (0xc900 >> 2),
288 (0x4e00 << 16) | (0xc904 >> 2),
290 (0x5e00 << 16) | (0xc904 >> 2),
292 (0x6e00 << 16) | (0xc904 >> 2),
294 (0x7e00 << 16) | (0xc904 >> 2),
296 (0x8e00 << 16) | (0xc904 >> 2),
298 (0x9e00 << 16) | (0xc904 >> 2),
300 (0xae00 << 16) | (0xc904 >> 2),
302 (0xbe00 << 16) | (0xc904 >> 2),
304 (0x4e00 << 16) | (0xc908 >> 2),
306 (0x5e00 << 16) | (0xc908 >> 2),
308 (0x6e00 << 16) | (0xc908 >> 2),
310 (0x7e00 << 16) | (0xc908 >> 2),
312 (0x8e00 << 16) | (0xc908 >> 2),
314 (0x9e00 << 16) | (0xc908 >> 2),
316 (0xae00 << 16) | (0xc908 >> 2),
318 (0xbe00 << 16) | (0xc908 >> 2),
320 (0x4e00 << 16) | (0xc90c >> 2),
322 (0x5e00 << 16) | (0xc90c >> 2),
324 (0x6e00 << 16) | (0xc90c >> 2),
326 (0x7e00 << 16) | (0xc90c >> 2),
328 (0x8e00 << 16) | (0xc90c >> 2),
330 (0x9e00 << 16) | (0xc90c >> 2),
332 (0xae00 << 16) | (0xc90c >> 2),
334 (0xbe00 << 16) | (0xc90c >> 2),
336 (0x4e00 << 16) | (0xc910 >> 2),
338 (0x5e00 << 16) | (0xc910 >> 2),
340 (0x6e00 << 16) | (0xc910 >> 2),
342 (0x7e00 << 16) | (0xc910 >> 2),
344 (0x8e00 << 16) | (0xc910 >> 2),
346 (0x9e00 << 16) | (0xc910 >> 2),
348 (0xae00 << 16) | (0xc910 >> 2),
350 (0xbe00 << 16) | (0xc910 >> 2),
352 (0x0e00 << 16) | (0xc99c >> 2),
354 (0x0e00 << 16) | (0x9834 >> 2),
356 (0x0000 << 16) | (0x30f00 >> 2),
358 (0x0001 << 16) | (0x30f00 >> 2),
360 (0x0000 << 16) | (0x30f04 >> 2),
362 (0x0001 << 16) | (0x30f04 >> 2),
364 (0x0000 << 16) | (0x30f08 >> 2),
366 (0x0001 << 16) | (0x30f08 >> 2),
368 (0x0000 << 16) | (0x30f0c >> 2),
370 (0x0001 << 16) | (0x30f0c >> 2),
372 (0x0600 << 16) | (0x9b7c >> 2),
374 (0x0e00 << 16) | (0x8a14 >> 2),
376 (0x0e00 << 16) | (0x8a18 >> 2),
378 (0x0600 << 16) | (0x30a00 >> 2),
380 (0x0e00 << 16) | (0x8bf0 >> 2),
382 (0x0e00 << 16) | (0x8bcc >> 2),
384 (0x0e00 << 16) | (0x8b24 >> 2),
386 (0x0e00 << 16) | (0x30a04 >> 2),
388 (0x0600 << 16) | (0x30a10 >> 2),
390 (0x0600 << 16) | (0x30a14 >> 2),
392 (0x0600 << 16) | (0x30a18 >> 2),
394 (0x0600 << 16) | (0x30a2c >> 2),
396 (0x0e00 << 16) | (0xc700 >> 2),
398 (0x0e00 << 16) | (0xc704 >> 2),
400 (0x0e00 << 16) | (0xc708 >> 2),
402 (0x0e00 << 16) | (0xc768 >> 2),
404 (0x0400 << 16) | (0xc770 >> 2),
406 (0x0400 << 16) | (0xc774 >> 2),
408 (0x0400 << 16) | (0xc778 >> 2),
410 (0x0400 << 16) | (0xc77c >> 2),
412 (0x0400 << 16) | (0xc780 >> 2),
414 (0x0400 << 16) | (0xc784 >> 2),
416 (0x0400 << 16) | (0xc788 >> 2),
418 (0x0400 << 16) | (0xc78c >> 2),
420 (0x0400 << 16) | (0xc798 >> 2),
422 (0x0400 << 16) | (0xc79c >> 2),
424 (0x0400 << 16) | (0xc7a0 >> 2),
426 (0x0400 << 16) | (0xc7a4 >> 2),
428 (0x0400 << 16) | (0xc7a8 >> 2),
430 (0x0400 << 16) | (0xc7ac >> 2),
432 (0x0400 << 16) | (0xc7b0 >> 2),
434 (0x0400 << 16) | (0xc7b4 >> 2),
436 (0x0e00 << 16) | (0x9100 >> 2),
438 (0x0e00 << 16) | (0x3c010 >> 2),
440 (0x0e00 << 16) | (0x92a8 >> 2),
442 (0x0e00 << 16) | (0x92ac >> 2),
444 (0x0e00 << 16) | (0x92b4 >> 2),
446 (0x0e00 << 16) | (0x92b8 >> 2),
448 (0x0e00 << 16) | (0x92bc >> 2),
450 (0x0e00 << 16) | (0x92c0 >> 2),
452 (0x0e00 << 16) | (0x92c4 >> 2),
454 (0x0e00 << 16) | (0x92c8 >> 2),
456 (0x0e00 << 16) | (0x92cc >> 2),
458 (0x0e00 << 16) | (0x92d0 >> 2),
460 (0x0e00 << 16) | (0x8c00 >> 2),
462 (0x0e00 << 16) | (0x8c04 >> 2),
464 (0x0e00 << 16) | (0x8c20 >> 2),
466 (0x0e00 << 16) | (0x8c38 >> 2),
468 (0x0e00 << 16) | (0x8c3c >> 2),
470 (0x0e00 << 16) | (0xae00 >> 2),
472 (0x0e00 << 16) | (0x9604 >> 2),
474 (0x0e00 << 16) | (0xac08 >> 2),
476 (0x0e00 << 16) | (0xac0c >> 2),
478 (0x0e00 << 16) | (0xac10 >> 2),
480 (0x0e00 << 16) | (0xac14 >> 2),
482 (0x0e00 << 16) | (0xac58 >> 2),
484 (0x0e00 << 16) | (0xac68 >> 2),
486 (0x0e00 << 16) | (0xac6c >> 2),
488 (0x0e00 << 16) | (0xac70 >> 2),
490 (0x0e00 << 16) | (0xac74 >> 2),
492 (0x0e00 << 16) | (0xac78 >> 2),
494 (0x0e00 << 16) | (0xac7c >> 2),
496 (0x0e00 << 16) | (0xac80 >> 2),
498 (0x0e00 << 16) | (0xac84 >> 2),
500 (0x0e00 << 16) | (0xac88 >> 2),
502 (0x0e00 << 16) | (0xac8c >> 2),
504 (0x0e00 << 16) | (0x970c >> 2),
506 (0x0e00 << 16) | (0x9714 >> 2),
508 (0x0e00 << 16) | (0x9718 >> 2),
510 (0x0e00 << 16) | (0x971c >> 2),
512 (0x0e00 << 16) | (0x31068 >> 2),
514 (0x4e00 << 16) | (0x31068 >> 2),
516 (0x5e00 << 16) | (0x31068 >> 2),
518 (0x6e00 << 16) | (0x31068 >> 2),
520 (0x7e00 << 16) | (0x31068 >> 2),
522 (0x8e00 << 16) | (0x31068 >> 2),
524 (0x9e00 << 16) | (0x31068 >> 2),
526 (0xae00 << 16) | (0x31068 >> 2),
528 (0xbe00 << 16) | (0x31068 >> 2),
530 (0x0e00 << 16) | (0xcd10 >> 2),
532 (0x0e00 << 16) | (0xcd14 >> 2),
534 (0x0e00 << 16) | (0x88b0 >> 2),
536 (0x0e00 << 16) | (0x88b4 >> 2),
538 (0x0e00 << 16) | (0x88b8 >> 2),
540 (0x0e00 << 16) | (0x88bc >> 2),
542 (0x0400 << 16) | (0x89c0 >> 2),
544 (0x0e00 << 16) | (0x88c4 >> 2),
546 (0x0e00 << 16) | (0x88c8 >> 2),
548 (0x0e00 << 16) | (0x88d0 >> 2),
550 (0x0e00 << 16) | (0x88d4 >> 2),
552 (0x0e00 << 16) | (0x88d8 >> 2),
554 (0x0e00 << 16) | (0x8980 >> 2),
556 (0x0e00 << 16) | (0x30938 >> 2),
558 (0x0e00 << 16) | (0x3093c >> 2),
560 (0x0e00 << 16) | (0x30940 >> 2),
562 (0x0e00 << 16) | (0x89a0 >> 2),
564 (0x0e00 << 16) | (0x30900 >> 2),
566 (0x0e00 << 16) | (0x30904 >> 2),
568 (0x0e00 << 16) | (0x89b4 >> 2),
570 (0x0e00 << 16) | (0x3c210 >> 2),
572 (0x0e00 << 16) | (0x3c214 >> 2),
574 (0x0e00 << 16) | (0x3c218 >> 2),
576 (0x0e00 << 16) | (0x8904 >> 2),
579 (0x0e00 << 16) | (0x8c28 >> 2),
580 (0x0e00 << 16) | (0x8c2c >> 2),
581 (0x0e00 << 16) | (0x8c30 >> 2),
582 (0x0e00 << 16) | (0x8c34 >> 2),
583 (0x0e00 << 16) | (0x9600 >> 2),
586 static const u32 kalindi_rlc_save_restore_register_list[] =
588 (0x0e00 << 16) | (0xc12c >> 2),
590 (0x0e00 << 16) | (0xc140 >> 2),
592 (0x0e00 << 16) | (0xc150 >> 2),
594 (0x0e00 << 16) | (0xc15c >> 2),
596 (0x0e00 << 16) | (0xc168 >> 2),
598 (0x0e00 << 16) | (0xc170 >> 2),
600 (0x0e00 << 16) | (0xc204 >> 2),
602 (0x0e00 << 16) | (0xc2b4 >> 2),
604 (0x0e00 << 16) | (0xc2b8 >> 2),
606 (0x0e00 << 16) | (0xc2bc >> 2),
608 (0x0e00 << 16) | (0xc2c0 >> 2),
610 (0x0e00 << 16) | (0x8228 >> 2),
612 (0x0e00 << 16) | (0x829c >> 2),
614 (0x0e00 << 16) | (0x869c >> 2),
616 (0x0600 << 16) | (0x98f4 >> 2),
618 (0x0e00 << 16) | (0x98f8 >> 2),
620 (0x0e00 << 16) | (0x9900 >> 2),
622 (0x0e00 << 16) | (0xc260 >> 2),
624 (0x0e00 << 16) | (0x90e8 >> 2),
626 (0x0e00 << 16) | (0x3c000 >> 2),
628 (0x0e00 << 16) | (0x3c00c >> 2),
630 (0x0e00 << 16) | (0x8c1c >> 2),
632 (0x0e00 << 16) | (0x9700 >> 2),
634 (0x0e00 << 16) | (0xcd20 >> 2),
636 (0x4e00 << 16) | (0xcd20 >> 2),
638 (0x5e00 << 16) | (0xcd20 >> 2),
640 (0x6e00 << 16) | (0xcd20 >> 2),
642 (0x7e00 << 16) | (0xcd20 >> 2),
644 (0x0e00 << 16) | (0x89bc >> 2),
646 (0x0e00 << 16) | (0x8900 >> 2),
649 (0x0e00 << 16) | (0xc130 >> 2),
651 (0x0e00 << 16) | (0xc134 >> 2),
653 (0x0e00 << 16) | (0xc1fc >> 2),
655 (0x0e00 << 16) | (0xc208 >> 2),
657 (0x0e00 << 16) | (0xc264 >> 2),
659 (0x0e00 << 16) | (0xc268 >> 2),
661 (0x0e00 << 16) | (0xc26c >> 2),
663 (0x0e00 << 16) | (0xc270 >> 2),
665 (0x0e00 << 16) | (0xc274 >> 2),
667 (0x0e00 << 16) | (0xc28c >> 2),
669 (0x0e00 << 16) | (0xc290 >> 2),
671 (0x0e00 << 16) | (0xc294 >> 2),
673 (0x0e00 << 16) | (0xc298 >> 2),
675 (0x0e00 << 16) | (0xc2a0 >> 2),
677 (0x0e00 << 16) | (0xc2a4 >> 2),
679 (0x0e00 << 16) | (0xc2a8 >> 2),
681 (0x0e00 << 16) | (0xc2ac >> 2),
683 (0x0e00 << 16) | (0x301d0 >> 2),
685 (0x0e00 << 16) | (0x30238 >> 2),
687 (0x0e00 << 16) | (0x30250 >> 2),
689 (0x0e00 << 16) | (0x30254 >> 2),
691 (0x0e00 << 16) | (0x30258 >> 2),
693 (0x0e00 << 16) | (0x3025c >> 2),
695 (0x4e00 << 16) | (0xc900 >> 2),
697 (0x5e00 << 16) | (0xc900 >> 2),
699 (0x6e00 << 16) | (0xc900 >> 2),
701 (0x7e00 << 16) | (0xc900 >> 2),
703 (0x4e00 << 16) | (0xc904 >> 2),
705 (0x5e00 << 16) | (0xc904 >> 2),
707 (0x6e00 << 16) | (0xc904 >> 2),
709 (0x7e00 << 16) | (0xc904 >> 2),
711 (0x4e00 << 16) | (0xc908 >> 2),
713 (0x5e00 << 16) | (0xc908 >> 2),
715 (0x6e00 << 16) | (0xc908 >> 2),
717 (0x7e00 << 16) | (0xc908 >> 2),
719 (0x4e00 << 16) | (0xc90c >> 2),
721 (0x5e00 << 16) | (0xc90c >> 2),
723 (0x6e00 << 16) | (0xc90c >> 2),
725 (0x7e00 << 16) | (0xc90c >> 2),
727 (0x4e00 << 16) | (0xc910 >> 2),
729 (0x5e00 << 16) | (0xc910 >> 2),
731 (0x6e00 << 16) | (0xc910 >> 2),
733 (0x7e00 << 16) | (0xc910 >> 2),
735 (0x0e00 << 16) | (0xc99c >> 2),
737 (0x0e00 << 16) | (0x9834 >> 2),
739 (0x0000 << 16) | (0x30f00 >> 2),
741 (0x0000 << 16) | (0x30f04 >> 2),
743 (0x0000 << 16) | (0x30f08 >> 2),
745 (0x0000 << 16) | (0x30f0c >> 2),
747 (0x0600 << 16) | (0x9b7c >> 2),
749 (0x0e00 << 16) | (0x8a14 >> 2),
751 (0x0e00 << 16) | (0x8a18 >> 2),
753 (0x0600 << 16) | (0x30a00 >> 2),
755 (0x0e00 << 16) | (0x8bf0 >> 2),
757 (0x0e00 << 16) | (0x8bcc >> 2),
759 (0x0e00 << 16) | (0x8b24 >> 2),
761 (0x0e00 << 16) | (0x30a04 >> 2),
763 (0x0600 << 16) | (0x30a10 >> 2),
765 (0x0600 << 16) | (0x30a14 >> 2),
767 (0x0600 << 16) | (0x30a18 >> 2),
769 (0x0600 << 16) | (0x30a2c >> 2),
771 (0x0e00 << 16) | (0xc700 >> 2),
773 (0x0e00 << 16) | (0xc704 >> 2),
775 (0x0e00 << 16) | (0xc708 >> 2),
777 (0x0e00 << 16) | (0xc768 >> 2),
779 (0x0400 << 16) | (0xc770 >> 2),
781 (0x0400 << 16) | (0xc774 >> 2),
783 (0x0400 << 16) | (0xc798 >> 2),
785 (0x0400 << 16) | (0xc79c >> 2),
787 (0x0e00 << 16) | (0x9100 >> 2),
789 (0x0e00 << 16) | (0x3c010 >> 2),
791 (0x0e00 << 16) | (0x8c00 >> 2),
793 (0x0e00 << 16) | (0x8c04 >> 2),
795 (0x0e00 << 16) | (0x8c20 >> 2),
797 (0x0e00 << 16) | (0x8c38 >> 2),
799 (0x0e00 << 16) | (0x8c3c >> 2),
801 (0x0e00 << 16) | (0xae00 >> 2),
803 (0x0e00 << 16) | (0x9604 >> 2),
805 (0x0e00 << 16) | (0xac08 >> 2),
807 (0x0e00 << 16) | (0xac0c >> 2),
809 (0x0e00 << 16) | (0xac10 >> 2),
811 (0x0e00 << 16) | (0xac14 >> 2),
813 (0x0e00 << 16) | (0xac58 >> 2),
815 (0x0e00 << 16) | (0xac68 >> 2),
817 (0x0e00 << 16) | (0xac6c >> 2),
819 (0x0e00 << 16) | (0xac70 >> 2),
821 (0x0e00 << 16) | (0xac74 >> 2),
823 (0x0e00 << 16) | (0xac78 >> 2),
825 (0x0e00 << 16) | (0xac7c >> 2),
827 (0x0e00 << 16) | (0xac80 >> 2),
829 (0x0e00 << 16) | (0xac84 >> 2),
831 (0x0e00 << 16) | (0xac88 >> 2),
833 (0x0e00 << 16) | (0xac8c >> 2),
835 (0x0e00 << 16) | (0x970c >> 2),
837 (0x0e00 << 16) | (0x9714 >> 2),
839 (0x0e00 << 16) | (0x9718 >> 2),
841 (0x0e00 << 16) | (0x971c >> 2),
843 (0x0e00 << 16) | (0x31068 >> 2),
845 (0x4e00 << 16) | (0x31068 >> 2),
847 (0x5e00 << 16) | (0x31068 >> 2),
849 (0x6e00 << 16) | (0x31068 >> 2),
851 (0x7e00 << 16) | (0x31068 >> 2),
853 (0x0e00 << 16) | (0xcd10 >> 2),
855 (0x0e00 << 16) | (0xcd14 >> 2),
857 (0x0e00 << 16) | (0x88b0 >> 2),
859 (0x0e00 << 16) | (0x88b4 >> 2),
861 (0x0e00 << 16) | (0x88b8 >> 2),
863 (0x0e00 << 16) | (0x88bc >> 2),
865 (0x0400 << 16) | (0x89c0 >> 2),
867 (0x0e00 << 16) | (0x88c4 >> 2),
869 (0x0e00 << 16) | (0x88c8 >> 2),
871 (0x0e00 << 16) | (0x88d0 >> 2),
873 (0x0e00 << 16) | (0x88d4 >> 2),
875 (0x0e00 << 16) | (0x88d8 >> 2),
877 (0x0e00 << 16) | (0x8980 >> 2),
879 (0x0e00 << 16) | (0x30938 >> 2),
881 (0x0e00 << 16) | (0x3093c >> 2),
883 (0x0e00 << 16) | (0x30940 >> 2),
885 (0x0e00 << 16) | (0x89a0 >> 2),
887 (0x0e00 << 16) | (0x30900 >> 2),
889 (0x0e00 << 16) | (0x30904 >> 2),
891 (0x0e00 << 16) | (0x89b4 >> 2),
893 (0x0e00 << 16) | (0x3e1fc >> 2),
895 (0x0e00 << 16) | (0x3c210 >> 2),
897 (0x0e00 << 16) | (0x3c214 >> 2),
899 (0x0e00 << 16) | (0x3c218 >> 2),
901 (0x0e00 << 16) | (0x8904 >> 2),
904 (0x0e00 << 16) | (0x8c28 >> 2),
905 (0x0e00 << 16) | (0x8c2c >> 2),
906 (0x0e00 << 16) | (0x8c30 >> 2),
907 (0x0e00 << 16) | (0x8c34 >> 2),
908 (0x0e00 << 16) | (0x9600 >> 2),
911 static const u32 bonaire_golden_spm_registers[] =
913 0x30800, 0xe0ffffff, 0xe0000000
916 static const u32 bonaire_golden_common_registers[] =
918 0xc770, 0xffffffff, 0x00000800,
919 0xc774, 0xffffffff, 0x00000800,
920 0xc798, 0xffffffff, 0x00007fbf,
921 0xc79c, 0xffffffff, 0x00007faf
924 static const u32 bonaire_golden_registers[] =
926 0x3354, 0x00000333, 0x00000333,
927 0x3350, 0x000c0fc0, 0x00040200,
928 0x9a10, 0x00010000, 0x00058208,
929 0x3c000, 0xffff1fff, 0x00140000,
930 0x3c200, 0xfdfc0fff, 0x00000100,
931 0x3c234, 0x40000000, 0x40000200,
932 0x9830, 0xffffffff, 0x00000000,
933 0x9834, 0xf00fffff, 0x00000400,
934 0x9838, 0x0002021c, 0x00020200,
935 0xc78, 0x00000080, 0x00000000,
936 0x5bb0, 0x000000f0, 0x00000070,
937 0x5bc0, 0xf0311fff, 0x80300000,
938 0x98f8, 0x73773777, 0x12010001,
939 0x350c, 0x00810000, 0x408af000,
940 0x7030, 0x31000111, 0x00000011,
941 0x2f48, 0x73773777, 0x12010001,
942 0x220c, 0x00007fb6, 0x0021a1b1,
943 0x2210, 0x00007fb6, 0x002021b1,
944 0x2180, 0x00007fb6, 0x00002191,
945 0x2218, 0x00007fb6, 0x002121b1,
946 0x221c, 0x00007fb6, 0x002021b1,
947 0x21dc, 0x00007fb6, 0x00002191,
948 0x21e0, 0x00007fb6, 0x00002191,
949 0x3628, 0x0000003f, 0x0000000a,
950 0x362c, 0x0000003f, 0x0000000a,
951 0x2ae4, 0x00073ffe, 0x000022a2,
952 0x240c, 0x000007ff, 0x00000000,
953 0x8a14, 0xf000003f, 0x00000007,
954 0x8bf0, 0x00002001, 0x00000001,
955 0x8b24, 0xffffffff, 0x00ffffff,
956 0x30a04, 0x0000ff0f, 0x00000000,
957 0x28a4c, 0x07ffffff, 0x06000000,
958 0x4d8, 0x00000fff, 0x00000100,
959 0x3e78, 0x00000001, 0x00000002,
960 0x9100, 0x03000000, 0x0362c688,
961 0x8c00, 0x000000ff, 0x00000001,
962 0xe40, 0x00001fff, 0x00001fff,
963 0x9060, 0x0000007f, 0x00000020,
964 0x9508, 0x00010000, 0x00010000,
965 0xac14, 0x000003ff, 0x000000f3,
966 0xac0c, 0xffffffff, 0x00001032
969 static const u32 bonaire_mgcg_cgcg_init[] =
971 0xc420, 0xffffffff, 0xfffffffc,
972 0x30800, 0xffffffff, 0xe0000000,
973 0x3c2a0, 0xffffffff, 0x00000100,
974 0x3c208, 0xffffffff, 0x00000100,
975 0x3c2c0, 0xffffffff, 0xc0000100,
976 0x3c2c8, 0xffffffff, 0xc0000100,
977 0x3c2c4, 0xffffffff, 0xc0000100,
978 0x55e4, 0xffffffff, 0x00600100,
979 0x3c280, 0xffffffff, 0x00000100,
980 0x3c214, 0xffffffff, 0x06000100,
981 0x3c220, 0xffffffff, 0x00000100,
982 0x3c218, 0xffffffff, 0x06000100,
983 0x3c204, 0xffffffff, 0x00000100,
984 0x3c2e0, 0xffffffff, 0x00000100,
985 0x3c224, 0xffffffff, 0x00000100,
986 0x3c200, 0xffffffff, 0x00000100,
987 0x3c230, 0xffffffff, 0x00000100,
988 0x3c234, 0xffffffff, 0x00000100,
989 0x3c250, 0xffffffff, 0x00000100,
990 0x3c254, 0xffffffff, 0x00000100,
991 0x3c258, 0xffffffff, 0x00000100,
992 0x3c25c, 0xffffffff, 0x00000100,
993 0x3c260, 0xffffffff, 0x00000100,
994 0x3c27c, 0xffffffff, 0x00000100,
995 0x3c278, 0xffffffff, 0x00000100,
996 0x3c210, 0xffffffff, 0x06000100,
997 0x3c290, 0xffffffff, 0x00000100,
998 0x3c274, 0xffffffff, 0x00000100,
999 0x3c2b4, 0xffffffff, 0x00000100,
1000 0x3c2b0, 0xffffffff, 0x00000100,
1001 0x3c270, 0xffffffff, 0x00000100,
1002 0x30800, 0xffffffff, 0xe0000000,
1003 0x3c020, 0xffffffff, 0x00010000,
1004 0x3c024, 0xffffffff, 0x00030002,
1005 0x3c028, 0xffffffff, 0x00040007,
1006 0x3c02c, 0xffffffff, 0x00060005,
1007 0x3c030, 0xffffffff, 0x00090008,
1008 0x3c034, 0xffffffff, 0x00010000,
1009 0x3c038, 0xffffffff, 0x00030002,
1010 0x3c03c, 0xffffffff, 0x00040007,
1011 0x3c040, 0xffffffff, 0x00060005,
1012 0x3c044, 0xffffffff, 0x00090008,
1013 0x3c048, 0xffffffff, 0x00010000,
1014 0x3c04c, 0xffffffff, 0x00030002,
1015 0x3c050, 0xffffffff, 0x00040007,
1016 0x3c054, 0xffffffff, 0x00060005,
1017 0x3c058, 0xffffffff, 0x00090008,
1018 0x3c05c, 0xffffffff, 0x00010000,
1019 0x3c060, 0xffffffff, 0x00030002,
1020 0x3c064, 0xffffffff, 0x00040007,
1021 0x3c068, 0xffffffff, 0x00060005,
1022 0x3c06c, 0xffffffff, 0x00090008,
1023 0x3c070, 0xffffffff, 0x00010000,
1024 0x3c074, 0xffffffff, 0x00030002,
1025 0x3c078, 0xffffffff, 0x00040007,
1026 0x3c07c, 0xffffffff, 0x00060005,
1027 0x3c080, 0xffffffff, 0x00090008,
1028 0x3c084, 0xffffffff, 0x00010000,
1029 0x3c088, 0xffffffff, 0x00030002,
1030 0x3c08c, 0xffffffff, 0x00040007,
1031 0x3c090, 0xffffffff, 0x00060005,
1032 0x3c094, 0xffffffff, 0x00090008,
1033 0x3c098, 0xffffffff, 0x00010000,
1034 0x3c09c, 0xffffffff, 0x00030002,
1035 0x3c0a0, 0xffffffff, 0x00040007,
1036 0x3c0a4, 0xffffffff, 0x00060005,
1037 0x3c0a8, 0xffffffff, 0x00090008,
1038 0x3c000, 0xffffffff, 0x96e00200,
1039 0x8708, 0xffffffff, 0x00900100,
1040 0xc424, 0xffffffff, 0x0020003f,
1041 0x38, 0xffffffff, 0x0140001c,
1042 0x3c, 0x000f0000, 0x000f0000,
1043 0x220, 0xffffffff, 0xC060000C,
1044 0x224, 0xc0000fff, 0x00000100,
1045 0xf90, 0xffffffff, 0x00000100,
1046 0xf98, 0x00000101, 0x00000000,
1047 0x20a8, 0xffffffff, 0x00000104,
1048 0x55e4, 0xff000fff, 0x00000100,
1049 0x30cc, 0xc0000fff, 0x00000104,
1050 0xc1e4, 0x00000001, 0x00000001,
1051 0xd00c, 0xff000ff0, 0x00000100,
1052 0xd80c, 0xff000ff0, 0x00000100
1055 static const u32 spectre_golden_spm_registers[] =
1057 0x30800, 0xe0ffffff, 0xe0000000
1060 static const u32 spectre_golden_common_registers[] =
1062 0xc770, 0xffffffff, 0x00000800,
1063 0xc774, 0xffffffff, 0x00000800,
1064 0xc798, 0xffffffff, 0x00007fbf,
1065 0xc79c, 0xffffffff, 0x00007faf
1068 static const u32 spectre_golden_registers[] =
1070 0x3c000, 0xffff1fff, 0x96940200,
1071 0x3c00c, 0xffff0001, 0xff000000,
1072 0x3c200, 0xfffc0fff, 0x00000100,
1073 0x6ed8, 0x00010101, 0x00010000,
1074 0x9834, 0xf00fffff, 0x00000400,
1075 0x9838, 0xfffffffc, 0x00020200,
1076 0x5bb0, 0x000000f0, 0x00000070,
1077 0x5bc0, 0xf0311fff, 0x80300000,
1078 0x98f8, 0x73773777, 0x12010001,
1079 0x9b7c, 0x00ff0000, 0x00fc0000,
1080 0x2f48, 0x73773777, 0x12010001,
1081 0x8a14, 0xf000003f, 0x00000007,
1082 0x8b24, 0xffffffff, 0x00ffffff,
1083 0x28350, 0x3f3f3fff, 0x00000082,
1084 0x28355, 0x0000003f, 0x00000000,
1085 0x3e78, 0x00000001, 0x00000002,
1086 0x913c, 0xffff03df, 0x00000004,
1087 0xc768, 0x00000008, 0x00000008,
1088 0x8c00, 0x000008ff, 0x00000800,
1089 0x9508, 0x00010000, 0x00010000,
1090 0xac0c, 0xffffffff, 0x54763210,
1091 0x214f8, 0x01ff01ff, 0x00000002,
1092 0x21498, 0x007ff800, 0x00200000,
1093 0x2015c, 0xffffffff, 0x00000f40,
1094 0x30934, 0xffffffff, 0x00000001
1097 static const u32 spectre_mgcg_cgcg_init[] =
1099 0xc420, 0xffffffff, 0xfffffffc,
1100 0x30800, 0xffffffff, 0xe0000000,
1101 0x3c2a0, 0xffffffff, 0x00000100,
1102 0x3c208, 0xffffffff, 0x00000100,
1103 0x3c2c0, 0xffffffff, 0x00000100,
1104 0x3c2c8, 0xffffffff, 0x00000100,
1105 0x3c2c4, 0xffffffff, 0x00000100,
1106 0x55e4, 0xffffffff, 0x00600100,
1107 0x3c280, 0xffffffff, 0x00000100,
1108 0x3c214, 0xffffffff, 0x06000100,
1109 0x3c220, 0xffffffff, 0x00000100,
1110 0x3c218, 0xffffffff, 0x06000100,
1111 0x3c204, 0xffffffff, 0x00000100,
1112 0x3c2e0, 0xffffffff, 0x00000100,
1113 0x3c224, 0xffffffff, 0x00000100,
1114 0x3c200, 0xffffffff, 0x00000100,
1115 0x3c230, 0xffffffff, 0x00000100,
1116 0x3c234, 0xffffffff, 0x00000100,
1117 0x3c250, 0xffffffff, 0x00000100,
1118 0x3c254, 0xffffffff, 0x00000100,
1119 0x3c258, 0xffffffff, 0x00000100,
1120 0x3c25c, 0xffffffff, 0x00000100,
1121 0x3c260, 0xffffffff, 0x00000100,
1122 0x3c27c, 0xffffffff, 0x00000100,
1123 0x3c278, 0xffffffff, 0x00000100,
1124 0x3c210, 0xffffffff, 0x06000100,
1125 0x3c290, 0xffffffff, 0x00000100,
1126 0x3c274, 0xffffffff, 0x00000100,
1127 0x3c2b4, 0xffffffff, 0x00000100,
1128 0x3c2b0, 0xffffffff, 0x00000100,
1129 0x3c270, 0xffffffff, 0x00000100,
1130 0x30800, 0xffffffff, 0xe0000000,
1131 0x3c020, 0xffffffff, 0x00010000,
1132 0x3c024, 0xffffffff, 0x00030002,
1133 0x3c028, 0xffffffff, 0x00040007,
1134 0x3c02c, 0xffffffff, 0x00060005,
1135 0x3c030, 0xffffffff, 0x00090008,
1136 0x3c034, 0xffffffff, 0x00010000,
1137 0x3c038, 0xffffffff, 0x00030002,
1138 0x3c03c, 0xffffffff, 0x00040007,
1139 0x3c040, 0xffffffff, 0x00060005,
1140 0x3c044, 0xffffffff, 0x00090008,
1141 0x3c048, 0xffffffff, 0x00010000,
1142 0x3c04c, 0xffffffff, 0x00030002,
1143 0x3c050, 0xffffffff, 0x00040007,
1144 0x3c054, 0xffffffff, 0x00060005,
1145 0x3c058, 0xffffffff, 0x00090008,
1146 0x3c05c, 0xffffffff, 0x00010000,
1147 0x3c060, 0xffffffff, 0x00030002,
1148 0x3c064, 0xffffffff, 0x00040007,
1149 0x3c068, 0xffffffff, 0x00060005,
1150 0x3c06c, 0xffffffff, 0x00090008,
1151 0x3c070, 0xffffffff, 0x00010000,
1152 0x3c074, 0xffffffff, 0x00030002,
1153 0x3c078, 0xffffffff, 0x00040007,
1154 0x3c07c, 0xffffffff, 0x00060005,
1155 0x3c080, 0xffffffff, 0x00090008,
1156 0x3c084, 0xffffffff, 0x00010000,
1157 0x3c088, 0xffffffff, 0x00030002,
1158 0x3c08c, 0xffffffff, 0x00040007,
1159 0x3c090, 0xffffffff, 0x00060005,
1160 0x3c094, 0xffffffff, 0x00090008,
1161 0x3c098, 0xffffffff, 0x00010000,
1162 0x3c09c, 0xffffffff, 0x00030002,
1163 0x3c0a0, 0xffffffff, 0x00040007,
1164 0x3c0a4, 0xffffffff, 0x00060005,
1165 0x3c0a8, 0xffffffff, 0x00090008,
1166 0x3c0ac, 0xffffffff, 0x00010000,
1167 0x3c0b0, 0xffffffff, 0x00030002,
1168 0x3c0b4, 0xffffffff, 0x00040007,
1169 0x3c0b8, 0xffffffff, 0x00060005,
1170 0x3c0bc, 0xffffffff, 0x00090008,
1171 0x3c000, 0xffffffff, 0x96e00200,
1172 0x8708, 0xffffffff, 0x00900100,
1173 0xc424, 0xffffffff, 0x0020003f,
1174 0x38, 0xffffffff, 0x0140001c,
1175 0x3c, 0x000f0000, 0x000f0000,
1176 0x220, 0xffffffff, 0xC060000C,
1177 0x224, 0xc0000fff, 0x00000100,
1178 0xf90, 0xffffffff, 0x00000100,
1179 0xf98, 0x00000101, 0x00000000,
1180 0x20a8, 0xffffffff, 0x00000104,
1181 0x55e4, 0xff000fff, 0x00000100,
1182 0x30cc, 0xc0000fff, 0x00000104,
1183 0xc1e4, 0x00000001, 0x00000001,
1184 0xd00c, 0xff000ff0, 0x00000100,
1185 0xd80c, 0xff000ff0, 0x00000100
1188 static const u32 kalindi_golden_spm_registers[] =
1190 0x30800, 0xe0ffffff, 0xe0000000
1193 static const u32 kalindi_golden_common_registers[] =
1195 0xc770, 0xffffffff, 0x00000800,
1196 0xc774, 0xffffffff, 0x00000800,
1197 0xc798, 0xffffffff, 0x00007fbf,
1198 0xc79c, 0xffffffff, 0x00007faf
1201 static const u32 kalindi_golden_registers[] =
1203 0x3c000, 0xffffdfff, 0x6e944040,
1204 0x55e4, 0xff607fff, 0xfc000100,
1205 0x3c220, 0xff000fff, 0x00000100,
1206 0x3c224, 0xff000fff, 0x00000100,
1207 0x3c200, 0xfffc0fff, 0x00000100,
1208 0x6ed8, 0x00010101, 0x00010000,
1209 0x9830, 0xffffffff, 0x00000000,
1210 0x9834, 0xf00fffff, 0x00000400,
1211 0x5bb0, 0x000000f0, 0x00000070,
1212 0x5bc0, 0xf0311fff, 0x80300000,
1213 0x98f8, 0x73773777, 0x12010001,
1214 0x98fc, 0xffffffff, 0x00000010,
1215 0x9b7c, 0x00ff0000, 0x00fc0000,
1216 0x8030, 0x00001f0f, 0x0000100a,
1217 0x2f48, 0x73773777, 0x12010001,
1218 0x2408, 0x000fffff, 0x000c007f,
1219 0x8a14, 0xf000003f, 0x00000007,
1220 0x8b24, 0x3fff3fff, 0x00ffcfff,
1221 0x30a04, 0x0000ff0f, 0x00000000,
1222 0x28a4c, 0x07ffffff, 0x06000000,
1223 0x4d8, 0x00000fff, 0x00000100,
1224 0x3e78, 0x00000001, 0x00000002,
1225 0xc768, 0x00000008, 0x00000008,
1226 0x8c00, 0x000000ff, 0x00000003,
1227 0x214f8, 0x01ff01ff, 0x00000002,
1228 0x21498, 0x007ff800, 0x00200000,
1229 0x2015c, 0xffffffff, 0x00000f40,
1230 0x88c4, 0x001f3ae3, 0x00000082,
1231 0x88d4, 0x0000001f, 0x00000010,
1232 0x30934, 0xffffffff, 0x00000000
1235 static const u32 kalindi_mgcg_cgcg_init[] =
1237 0xc420, 0xffffffff, 0xfffffffc,
1238 0x30800, 0xffffffff, 0xe0000000,
1239 0x3c2a0, 0xffffffff, 0x00000100,
1240 0x3c208, 0xffffffff, 0x00000100,
1241 0x3c2c0, 0xffffffff, 0x00000100,
1242 0x3c2c8, 0xffffffff, 0x00000100,
1243 0x3c2c4, 0xffffffff, 0x00000100,
1244 0x55e4, 0xffffffff, 0x00600100,
1245 0x3c280, 0xffffffff, 0x00000100,
1246 0x3c214, 0xffffffff, 0x06000100,
1247 0x3c220, 0xffffffff, 0x00000100,
1248 0x3c218, 0xffffffff, 0x06000100,
1249 0x3c204, 0xffffffff, 0x00000100,
1250 0x3c2e0, 0xffffffff, 0x00000100,
1251 0x3c224, 0xffffffff, 0x00000100,
1252 0x3c200, 0xffffffff, 0x00000100,
1253 0x3c230, 0xffffffff, 0x00000100,
1254 0x3c234, 0xffffffff, 0x00000100,
1255 0x3c250, 0xffffffff, 0x00000100,
1256 0x3c254, 0xffffffff, 0x00000100,
1257 0x3c258, 0xffffffff, 0x00000100,
1258 0x3c25c, 0xffffffff, 0x00000100,
1259 0x3c260, 0xffffffff, 0x00000100,
1260 0x3c27c, 0xffffffff, 0x00000100,
1261 0x3c278, 0xffffffff, 0x00000100,
1262 0x3c210, 0xffffffff, 0x06000100,
1263 0x3c290, 0xffffffff, 0x00000100,
1264 0x3c274, 0xffffffff, 0x00000100,
1265 0x3c2b4, 0xffffffff, 0x00000100,
1266 0x3c2b0, 0xffffffff, 0x00000100,
1267 0x3c270, 0xffffffff, 0x00000100,
1268 0x30800, 0xffffffff, 0xe0000000,
1269 0x3c020, 0xffffffff, 0x00010000,
1270 0x3c024, 0xffffffff, 0x00030002,
1271 0x3c028, 0xffffffff, 0x00040007,
1272 0x3c02c, 0xffffffff, 0x00060005,
1273 0x3c030, 0xffffffff, 0x00090008,
1274 0x3c034, 0xffffffff, 0x00010000,
1275 0x3c038, 0xffffffff, 0x00030002,
1276 0x3c03c, 0xffffffff, 0x00040007,
1277 0x3c040, 0xffffffff, 0x00060005,
1278 0x3c044, 0xffffffff, 0x00090008,
1279 0x3c000, 0xffffffff, 0x96e00200,
1280 0x8708, 0xffffffff, 0x00900100,
1281 0xc424, 0xffffffff, 0x0020003f,
1282 0x38, 0xffffffff, 0x0140001c,
1283 0x3c, 0x000f0000, 0x000f0000,
1284 0x220, 0xffffffff, 0xC060000C,
1285 0x224, 0xc0000fff, 0x00000100,
1286 0x20a8, 0xffffffff, 0x00000104,
1287 0x55e4, 0xff000fff, 0x00000100,
1288 0x30cc, 0xc0000fff, 0x00000104,
1289 0xc1e4, 0x00000001, 0x00000001,
1290 0xd00c, 0xff000ff0, 0x00000100,
1291 0xd80c, 0xff000ff0, 0x00000100
1294 static void cik_init_golden_registers(struct radeon_device *rdev)
1296 switch (rdev->family) {
1298 radeon_program_register_sequence(rdev,
1299 bonaire_mgcg_cgcg_init,
1300 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1301 radeon_program_register_sequence(rdev,
1302 bonaire_golden_registers,
1303 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1304 radeon_program_register_sequence(rdev,
1305 bonaire_golden_common_registers,
1306 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1307 radeon_program_register_sequence(rdev,
1308 bonaire_golden_spm_registers,
1309 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1312 radeon_program_register_sequence(rdev,
1313 kalindi_mgcg_cgcg_init,
1314 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1315 radeon_program_register_sequence(rdev,
1316 kalindi_golden_registers,
1317 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1318 radeon_program_register_sequence(rdev,
1319 kalindi_golden_common_registers,
1320 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1321 radeon_program_register_sequence(rdev,
1322 kalindi_golden_spm_registers,
1323 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1326 radeon_program_register_sequence(rdev,
1327 spectre_mgcg_cgcg_init,
1328 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1329 radeon_program_register_sequence(rdev,
1330 spectre_golden_registers,
1331 (const u32)ARRAY_SIZE(spectre_golden_registers));
1332 radeon_program_register_sequence(rdev,
1333 spectre_golden_common_registers,
1334 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1335 radeon_program_register_sequence(rdev,
1336 spectre_golden_spm_registers,
1337 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1345 * cik_get_xclk - get the xclk
1347 * @rdev: radeon_device pointer
1349 * Returns the reference clock used by the gfx engine
1352 u32 cik_get_xclk(struct radeon_device *rdev)
1354 u32 reference_clock = rdev->clock.spll.reference_freq;
1356 if (rdev->flags & RADEON_IS_IGP) {
1357 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1358 return reference_clock / 2;
1360 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1361 return reference_clock / 4;
1363 return reference_clock;
1367 * cik_mm_rdoorbell - read a doorbell dword
1369 * @rdev: radeon_device pointer
1370 * @offset: byte offset into the aperture
1372 * Returns the value in the doorbell aperture at the
1373 * requested offset (CIK).
1375 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1377 if (offset < rdev->doorbell.size) {
1378 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1380 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1386 * cik_mm_wdoorbell - write a doorbell dword
1388 * @rdev: radeon_device pointer
1389 * @offset: byte offset into the aperture
1390 * @v: value to write
1392 * Writes @v to the doorbell aperture at the
1393 * requested offset (CIK).
1395 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1397 if (offset < rdev->doorbell.size) {
1398 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1400 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1404 #define BONAIRE_IO_MC_REGS_SIZE 36
1406 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1408 {0x00000070, 0x04400000},
1409 {0x00000071, 0x80c01803},
1410 {0x00000072, 0x00004004},
1411 {0x00000073, 0x00000100},
1412 {0x00000074, 0x00ff0000},
1413 {0x00000075, 0x34000000},
1414 {0x00000076, 0x08000014},
1415 {0x00000077, 0x00cc08ec},
1416 {0x00000078, 0x00000400},
1417 {0x00000079, 0x00000000},
1418 {0x0000007a, 0x04090000},
1419 {0x0000007c, 0x00000000},
1420 {0x0000007e, 0x4408a8e8},
1421 {0x0000007f, 0x00000304},
1422 {0x00000080, 0x00000000},
1423 {0x00000082, 0x00000001},
1424 {0x00000083, 0x00000002},
1425 {0x00000084, 0xf3e4f400},
1426 {0x00000085, 0x052024e3},
1427 {0x00000087, 0x00000000},
1428 {0x00000088, 0x01000000},
1429 {0x0000008a, 0x1c0a0000},
1430 {0x0000008b, 0xff010000},
1431 {0x0000008d, 0xffffefff},
1432 {0x0000008e, 0xfff3efff},
1433 {0x0000008f, 0xfff3efbf},
1434 {0x00000092, 0xf7ffffff},
1435 {0x00000093, 0xffffff7f},
1436 {0x00000095, 0x00101101},
1437 {0x00000096, 0x00000fff},
1438 {0x00000097, 0x00116fff},
1439 {0x00000098, 0x60010000},
1440 {0x00000099, 0x10010000},
1441 {0x0000009a, 0x00006000},
1442 {0x0000009b, 0x00001000},
1443 {0x0000009f, 0x00b48000}
1447 * cik_srbm_select - select specific register instances
1449 * @rdev: radeon_device pointer
1450 * @me: selected ME (micro engine)
1455 * Switches the currently active registers instances. Some
1456 * registers are instanced per VMID, others are instanced per
1457 * me/pipe/queue combination.
1459 static void cik_srbm_select(struct radeon_device *rdev,
1460 u32 me, u32 pipe, u32 queue, u32 vmid)
1462 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1465 QUEUEID(queue & 0x7));
1466 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1471 * ci_mc_load_microcode - load MC ucode into the hw
1473 * @rdev: radeon_device pointer
1475 * Load the GDDR MC ucode into the hw (CIK).
1476 * Returns 0 on success, error on failure.
1478 static int ci_mc_load_microcode(struct radeon_device *rdev)
1480 const __be32 *fw_data;
1481 u32 running, blackout = 0;
1483 int i, ucode_size, regs_size;
1488 switch (rdev->family) {
1491 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1492 ucode_size = CIK_MC_UCODE_SIZE;
1493 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1497 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1501 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1502 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1505 /* reset the engine and set to writable */
1506 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1507 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1509 /* load mc io regs */
1510 for (i = 0; i < regs_size; i++) {
1511 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1512 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1514 /* load the MC ucode */
1515 fw_data = (const __be32 *)rdev->mc_fw->data;
1516 for (i = 0; i < ucode_size; i++)
1517 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1519 /* put the engine back into the active state */
1520 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1521 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1522 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1524 /* wait for training to complete */
1525 for (i = 0; i < rdev->usec_timeout; i++) {
1526 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1530 for (i = 0; i < rdev->usec_timeout; i++) {
1531 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1544 * cik_init_microcode - load ucode images from disk
1546 * @rdev: radeon_device pointer
1548 * Use the firmware interface to load the ucode images into
1549 * the driver (not loaded into hw).
1550 * Returns 0 on success, error on failure.
1552 static int cik_init_microcode(struct radeon_device *rdev)
1554 const char *chip_name;
1555 size_t pfp_req_size, me_req_size, ce_req_size,
1556 mec_req_size, rlc_req_size, mc_req_size,
1557 sdma_req_size, smc_req_size;
1563 switch (rdev->family) {
1565 chip_name = "BONAIRE";
1566 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1567 me_req_size = CIK_ME_UCODE_SIZE * 4;
1568 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1569 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1570 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1571 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1572 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1573 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1576 chip_name = "KAVERI";
1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1582 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1585 chip_name = "KABINI";
1586 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1591 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1596 DRM_INFO("Loading %s Microcode\n", chip_name);
1598 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1599 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1602 if (rdev->pfp_fw->size != pfp_req_size) {
1604 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1605 rdev->pfp_fw->size, fw_name);
1610 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1611 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1614 if (rdev->me_fw->size != me_req_size) {
1616 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1617 rdev->me_fw->size, fw_name);
1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1622 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1625 if (rdev->ce_fw->size != ce_req_size) {
1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 rdev->ce_fw->size, fw_name);
1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1633 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1636 if (rdev->mec_fw->size != mec_req_size) {
1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 rdev->mec_fw->size, fw_name);
1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1644 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1647 if (rdev->rlc_fw->size != rlc_req_size) {
1649 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1650 rdev->rlc_fw->size, fw_name);
1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1655 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1658 if (rdev->sdma_fw->size != sdma_req_size) {
1660 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1661 rdev->sdma_fw->size, fw_name);
1665 /* No SMC, MC ucode on APUs */
1666 if (!(rdev->flags & RADEON_IS_IGP)) {
1667 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1668 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1671 if (rdev->mc_fw->size != mc_req_size) {
1673 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1674 rdev->mc_fw->size, fw_name);
1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1679 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1682 "smc: error loading firmware \"%s\"\n",
1684 release_firmware(rdev->smc_fw);
1685 rdev->smc_fw = NULL;
1686 } else if (rdev->smc_fw->size != smc_req_size) {
1688 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1689 rdev->smc_fw->size, fw_name);
1698 "cik_cp: Failed to load firmware \"%s\"\n",
1700 release_firmware(rdev->pfp_fw);
1701 rdev->pfp_fw = NULL;
1702 release_firmware(rdev->me_fw);
1704 release_firmware(rdev->ce_fw);
1706 release_firmware(rdev->rlc_fw);
1707 rdev->rlc_fw = NULL;
1708 release_firmware(rdev->mc_fw);
1710 release_firmware(rdev->smc_fw);
1711 rdev->smc_fw = NULL;
1720 * cik_tiling_mode_table_init - init the hw tiling table
1722 * @rdev: radeon_device pointer
1724 * Starting with SI, the tiling setup is done globally in a
1725 * set of 32 tiling modes. Rather than selecting each set of
1726 * parameters per surface as on older asics, we just select
1727 * which index in the tiling table we want to use, and the
1728 * surface uses those parameters (CIK).
1730 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1732 const u32 num_tile_mode_states = 32;
1733 const u32 num_secondary_tile_mode_states = 16;
1734 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1735 u32 num_pipe_configs;
1736 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1737 rdev->config.cik.max_shader_engines;
1739 switch (rdev->config.cik.mem_row_size_in_kb) {
1741 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1745 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1748 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1752 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1753 if (num_pipe_configs > 8)
1754 num_pipe_configs = 8; /* ??? */
1756 if (num_pipe_configs == 8) {
1757 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1758 switch (reg_offset) {
1760 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1761 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1762 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1766 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1767 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1768 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1769 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 TILE_SPLIT(split_equal_to_row_size));
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1796 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1800 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1802 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1803 TILE_SPLIT(split_equal_to_row_size));
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1807 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1810 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1811 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1814 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1815 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1816 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1820 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1821 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1822 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1832 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1833 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1836 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1837 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1838 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1842 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1843 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1844 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1848 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1854 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1855 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1858 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1860 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1864 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1866 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1870 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1879 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1880 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1882 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1883 switch (reg_offset) {
1885 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1888 NUM_BANKS(ADDR_SURF_16_BANK));
1891 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1894 NUM_BANKS(ADDR_SURF_16_BANK));
1897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1900 NUM_BANKS(ADDR_SURF_16_BANK));
1903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 NUM_BANKS(ADDR_SURF_16_BANK));
1909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1912 NUM_BANKS(ADDR_SURF_8_BANK));
1915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1918 NUM_BANKS(ADDR_SURF_4_BANK));
1921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 NUM_BANKS(ADDR_SURF_2_BANK));
1927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1930 NUM_BANKS(ADDR_SURF_16_BANK));
1933 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1936 NUM_BANKS(ADDR_SURF_16_BANK));
1939 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1942 NUM_BANKS(ADDR_SURF_16_BANK));
1945 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1948 NUM_BANKS(ADDR_SURF_16_BANK));
1951 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1954 NUM_BANKS(ADDR_SURF_8_BANK));
1957 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1960 NUM_BANKS(ADDR_SURF_4_BANK));
1963 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 NUM_BANKS(ADDR_SURF_2_BANK));
1972 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1974 } else if (num_pipe_configs == 4) {
1976 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1977 switch (reg_offset) {
1979 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1981 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1985 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 TILE_SPLIT(split_equal_to_row_size));
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2013 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2019 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2022 TILE_SPLIT(split_equal_to_row_size));
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2026 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2029 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2033 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2035 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2039 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2055 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2057 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2077 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2078 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2079 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2098 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2099 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2101 } else if (num_rbs < 4) {
2102 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2103 switch (reg_offset) {
2105 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2107 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2111 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2113 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 TILE_SPLIT(split_equal_to_row_size));
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2141 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2145 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2147 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148 TILE_SPLIT(split_equal_to_row_size));
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2152 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2155 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2159 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2165 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2171 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2181 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2203 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2215 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2225 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2228 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2229 switch (reg_offset) {
2231 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234 NUM_BANKS(ADDR_SURF_16_BANK));
2237 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2240 NUM_BANKS(ADDR_SURF_16_BANK));
2243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246 NUM_BANKS(ADDR_SURF_16_BANK));
2249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252 NUM_BANKS(ADDR_SURF_16_BANK));
2255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_16_BANK));
2261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 NUM_BANKS(ADDR_SURF_8_BANK));
2267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270 NUM_BANKS(ADDR_SURF_4_BANK));
2273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276 NUM_BANKS(ADDR_SURF_16_BANK));
2279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282 NUM_BANKS(ADDR_SURF_16_BANK));
2285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 NUM_BANKS(ADDR_SURF_16_BANK));
2291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300 NUM_BANKS(ADDR_SURF_16_BANK));
2303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306 NUM_BANKS(ADDR_SURF_8_BANK));
2309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2312 NUM_BANKS(ADDR_SURF_4_BANK));
2318 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2320 } else if (num_pipe_configs == 2) {
2321 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2322 switch (reg_offset) {
2324 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326 PIPE_CONFIG(ADDR_SURF_P2) |
2327 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2330 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 PIPE_CONFIG(ADDR_SURF_P2) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P2) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 TILE_SPLIT(split_equal_to_row_size));
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2358 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2360 PIPE_CONFIG(ADDR_SURF_P2) |
2361 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2364 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366 PIPE_CONFIG(ADDR_SURF_P2) |
2367 TILE_SPLIT(split_equal_to_row_size));
2370 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2373 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2377 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379 PIPE_CONFIG(ADDR_SURF_P2) |
2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 PIPE_CONFIG(ADDR_SURF_P2) |
2386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 PIPE_CONFIG(ADDR_SURF_P2) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2399 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 PIPE_CONFIG(ADDR_SURF_P2) |
2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 PIPE_CONFIG(ADDR_SURF_P2) |
2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2421 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423 PIPE_CONFIG(ADDR_SURF_P2) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429 PIPE_CONFIG(ADDR_SURF_P2) |
2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2443 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2445 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2446 switch (reg_offset) {
2448 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2451 NUM_BANKS(ADDR_SURF_16_BANK));
2454 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2457 NUM_BANKS(ADDR_SURF_16_BANK));
2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 NUM_BANKS(ADDR_SURF_16_BANK));
2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 NUM_BANKS(ADDR_SURF_8_BANK));
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499 NUM_BANKS(ADDR_SURF_16_BANK));
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 NUM_BANKS(ADDR_SURF_16_BANK));
2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 NUM_BANKS(ADDR_SURF_16_BANK));
2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 NUM_BANKS(ADDR_SURF_8_BANK));
2535 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2538 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2542 * cik_select_se_sh - select which SE, SH to address
2544 * @rdev: radeon_device pointer
2545 * @se_num: shader engine to address
2546 * @sh_num: sh block to address
2548 * Select which SE, SH combinations to address. Certain
2549 * registers are instanced per SE or SH. 0xffffffff means
2550 * broadcast to all SEs or SHs (CIK).
2552 static void cik_select_se_sh(struct radeon_device *rdev,
2553 u32 se_num, u32 sh_num)
2555 u32 data = INSTANCE_BROADCAST_WRITES;
2557 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2558 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2559 else if (se_num == 0xffffffff)
2560 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2561 else if (sh_num == 0xffffffff)
2562 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2564 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2565 WREG32(GRBM_GFX_INDEX, data);
2569 * cik_create_bitmask - create a bitmask
2571 * @bit_width: length of the mask
2573 * create a variable length bit mask (CIK).
2574 * Returns the bitmask.
2576 static u32 cik_create_bitmask(u32 bit_width)
2580 for (i = 0; i < bit_width; i++) {
2588 * cik_select_se_sh - select which SE, SH to address
2590 * @rdev: radeon_device pointer
2591 * @max_rb_num: max RBs (render backends) for the asic
2592 * @se_num: number of SEs (shader engines) for the asic
2593 * @sh_per_se: number of SH blocks per SE for the asic
2595 * Calculates the bitmask of disabled RBs (CIK).
2596 * Returns the disabled RB bitmask.
2598 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2599 u32 max_rb_num, u32 se_num,
2604 data = RREG32(CC_RB_BACKEND_DISABLE);
2606 data &= BACKEND_DISABLE_MASK;
2609 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2611 data >>= BACKEND_DISABLE_SHIFT;
2613 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2619 * cik_setup_rb - setup the RBs on the asic
2621 * @rdev: radeon_device pointer
2622 * @se_num: number of SEs (shader engines) for the asic
2623 * @sh_per_se: number of SH blocks per SE for the asic
2624 * @max_rb_num: max RBs (render backends) for the asic
2626 * Configures per-SE/SH RB registers (CIK).
2628 static void cik_setup_rb(struct radeon_device *rdev,
2629 u32 se_num, u32 sh_per_se,
2634 u32 disabled_rbs = 0;
2635 u32 enabled_rbs = 0;
2637 for (i = 0; i < se_num; i++) {
2638 for (j = 0; j < sh_per_se; j++) {
2639 cik_select_se_sh(rdev, i, j);
2640 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2641 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2644 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2647 for (i = 0; i < max_rb_num; i++) {
2648 if (!(disabled_rbs & mask))
2649 enabled_rbs |= mask;
2653 for (i = 0; i < se_num; i++) {
2654 cik_select_se_sh(rdev, i, 0xffffffff);
2656 for (j = 0; j < sh_per_se; j++) {
2657 switch (enabled_rbs & 3) {
2659 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2662 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2666 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2671 WREG32(PA_SC_RASTER_CONFIG, data);
2673 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2677 * cik_gpu_init - setup the 3D engine
2679 * @rdev: radeon_device pointer
2681 * Configures the 3D engine and tiling configuration
2682 * registers so that the 3D engine is usable.
2684 static void cik_gpu_init(struct radeon_device *rdev)
2686 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2687 u32 mc_shared_chmap, mc_arb_ramcfg;
2688 u32 hdp_host_path_cntl;
2692 switch (rdev->family) {
2694 rdev->config.cik.max_shader_engines = 2;
2695 rdev->config.cik.max_tile_pipes = 4;
2696 rdev->config.cik.max_cu_per_sh = 7;
2697 rdev->config.cik.max_sh_per_se = 1;
2698 rdev->config.cik.max_backends_per_se = 2;
2699 rdev->config.cik.max_texture_channel_caches = 4;
2700 rdev->config.cik.max_gprs = 256;
2701 rdev->config.cik.max_gs_threads = 32;
2702 rdev->config.cik.max_hw_contexts = 8;
2704 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2705 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2706 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2707 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2708 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2715 rdev->config.cik.max_shader_engines = 1;
2716 rdev->config.cik.max_tile_pipes = 2;
2717 rdev->config.cik.max_cu_per_sh = 2;
2718 rdev->config.cik.max_sh_per_se = 1;
2719 rdev->config.cik.max_backends_per_se = 1;
2720 rdev->config.cik.max_texture_channel_caches = 2;
2721 rdev->config.cik.max_gprs = 256;
2722 rdev->config.cik.max_gs_threads = 16;
2723 rdev->config.cik.max_hw_contexts = 8;
2725 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2726 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2727 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2728 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2729 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2733 /* Initialize HDP */
2734 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2735 WREG32((0x2c14 + j), 0x00000000);
2736 WREG32((0x2c18 + j), 0x00000000);
2737 WREG32((0x2c1c + j), 0x00000000);
2738 WREG32((0x2c20 + j), 0x00000000);
2739 WREG32((0x2c24 + j), 0x00000000);
2742 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2744 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2746 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2747 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2749 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2750 rdev->config.cik.mem_max_burst_length_bytes = 256;
2751 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2752 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2753 if (rdev->config.cik.mem_row_size_in_kb > 4)
2754 rdev->config.cik.mem_row_size_in_kb = 4;
2755 /* XXX use MC settings? */
2756 rdev->config.cik.shader_engine_tile_size = 32;
2757 rdev->config.cik.num_gpus = 1;
2758 rdev->config.cik.multi_gpu_tile_size = 64;
2760 /* fix up row size */
2761 gb_addr_config &= ~ROW_SIZE_MASK;
2762 switch (rdev->config.cik.mem_row_size_in_kb) {
2765 gb_addr_config |= ROW_SIZE(0);
2768 gb_addr_config |= ROW_SIZE(1);
2771 gb_addr_config |= ROW_SIZE(2);
2775 /* setup tiling info dword. gb_addr_config is not adequate since it does
2776 * not have bank info, so create a custom tiling dword.
2777 * bits 3:0 num_pipes
2778 * bits 7:4 num_banks
2779 * bits 11:8 group_size
2780 * bits 15:12 row_size
2782 rdev->config.cik.tile_config = 0;
2783 switch (rdev->config.cik.num_tile_pipes) {
2785 rdev->config.cik.tile_config |= (0 << 0);
2788 rdev->config.cik.tile_config |= (1 << 0);
2791 rdev->config.cik.tile_config |= (2 << 0);
2795 /* XXX what about 12? */
2796 rdev->config.cik.tile_config |= (3 << 0);
2799 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2800 rdev->config.cik.tile_config |= 1 << 4;
2802 rdev->config.cik.tile_config |= 0 << 4;
2803 rdev->config.cik.tile_config |=
2804 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2805 rdev->config.cik.tile_config |=
2806 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2808 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2809 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2810 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2811 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2812 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2813 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2814 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2815 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2817 cik_tiling_mode_table_init(rdev);
2819 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2820 rdev->config.cik.max_sh_per_se,
2821 rdev->config.cik.max_backends_per_se);
2823 /* set HW defaults for 3D engine */
2824 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2826 WREG32(SX_DEBUG_1, 0x20);
2828 WREG32(TA_CNTL_AUX, 0x00010000);
2830 tmp = RREG32(SPI_CONFIG_CNTL);
2832 WREG32(SPI_CONFIG_CNTL, tmp);
2834 WREG32(SQ_CONFIG, 1);
2836 WREG32(DB_DEBUG, 0);
2838 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2840 WREG32(DB_DEBUG2, tmp);
2842 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2844 WREG32(DB_DEBUG3, tmp);
2846 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2848 WREG32(CB_HW_CONTROL, tmp);
2850 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2852 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2853 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2854 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2855 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2857 WREG32(VGT_NUM_INSTANCES, 1);
2859 WREG32(CP_PERFMON_CNTL, 0);
2861 WREG32(SQ_CONFIG, 0);
2863 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2864 FORCE_EOV_MAX_REZ_CNT(255)));
2866 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2867 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2869 WREG32(VGT_GS_VERTEX_REUSE, 16);
2870 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2872 tmp = RREG32(HDP_MISC_CNTL);
2873 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2874 WREG32(HDP_MISC_CNTL, tmp);
2876 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2877 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2879 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2880 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2886 * GPU scratch registers helpers function.
2889 * cik_scratch_init - setup driver info for CP scratch regs
2891 * @rdev: radeon_device pointer
2893 * Set up the number and offset of the CP scratch registers.
2894 * NOTE: use of CP scratch registers is a legacy inferface and
2895 * is not used by default on newer asics (r6xx+). On newer asics,
2896 * memory buffers are used for fences rather than scratch regs.
2898 static void cik_scratch_init(struct radeon_device *rdev)
2902 rdev->scratch.num_reg = 7;
2903 rdev->scratch.reg_base = SCRATCH_REG0;
2904 for (i = 0; i < rdev->scratch.num_reg; i++) {
2905 rdev->scratch.free[i] = true;
2906 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2911 * cik_ring_test - basic gfx ring test
2913 * @rdev: radeon_device pointer
2914 * @ring: radeon_ring structure holding ring information
2916 * Allocate a scratch register and write to it using the gfx ring (CIK).
2917 * Provides a basic gfx ring test to verify that the ring is working.
2918 * Used by cik_cp_gfx_resume();
2919 * Returns 0 on success, error on failure.
2921 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2928 r = radeon_scratch_get(rdev, &scratch);
2930 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2933 WREG32(scratch, 0xCAFEDEAD);
2934 r = radeon_ring_lock(rdev, ring, 3);
2936 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2937 radeon_scratch_free(rdev, scratch);
2940 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2941 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2942 radeon_ring_write(ring, 0xDEADBEEF);
2943 radeon_ring_unlock_commit(rdev, ring);
2945 for (i = 0; i < rdev->usec_timeout; i++) {
2946 tmp = RREG32(scratch);
2947 if (tmp == 0xDEADBEEF)
2951 if (i < rdev->usec_timeout) {
2952 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2954 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2955 ring->idx, scratch, tmp);
2958 radeon_scratch_free(rdev, scratch);
2963 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2965 * @rdev: radeon_device pointer
2966 * @fence: radeon fence object
2968 * Emits a fence sequnce number on the gfx ring and flushes
2971 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2972 struct radeon_fence *fence)
2974 struct radeon_ring *ring = &rdev->ring[fence->ring];
2975 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2977 /* EVENT_WRITE_EOP - flush caches, send int */
2978 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2979 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2981 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2983 radeon_ring_write(ring, addr & 0xfffffffc);
2984 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2985 radeon_ring_write(ring, fence->seq);
2986 radeon_ring_write(ring, 0);
2988 /* We should be using the new WAIT_REG_MEM special op packet here
2989 * but it causes the CP to hang
2991 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2992 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2993 WRITE_DATA_DST_SEL(0)));
2994 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2995 radeon_ring_write(ring, 0);
2996 radeon_ring_write(ring, 0);
3000 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3002 * @rdev: radeon_device pointer
3003 * @fence: radeon fence object
3005 * Emits a fence sequnce number on the compute ring and flushes
3008 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3009 struct radeon_fence *fence)
3011 struct radeon_ring *ring = &rdev->ring[fence->ring];
3012 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3014 /* RELEASE_MEM - flush caches, send int */
3015 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3016 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3018 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3020 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3021 radeon_ring_write(ring, addr & 0xfffffffc);
3022 radeon_ring_write(ring, upper_32_bits(addr));
3023 radeon_ring_write(ring, fence->seq);
3024 radeon_ring_write(ring, 0);
3026 /* We should be using the new WAIT_REG_MEM special op packet here
3027 * but it causes the CP to hang
3029 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3030 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3031 WRITE_DATA_DST_SEL(0)));
3032 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3033 radeon_ring_write(ring, 0);
3034 radeon_ring_write(ring, 0);
3037 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3038 struct radeon_ring *ring,
3039 struct radeon_semaphore *semaphore,
3042 uint64_t addr = semaphore->gpu_addr;
3043 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3045 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3046 radeon_ring_write(ring, addr & 0xffffffff);
3047 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3054 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3056 * @rdev: radeon_device pointer
3057 * @ib: radeon indirect buffer object
3059 * Emits an DE (drawing engine) or CE (constant engine) IB
3060 * on the gfx ring. IBs are usually generated by userspace
3061 * acceleration drivers and submitted to the kernel for
3062 * sheduling on the ring. This function schedules the IB
3063 * on the gfx ring for execution by the GPU.
3065 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3067 struct radeon_ring *ring = &rdev->ring[ib->ring];
3068 u32 header, control = INDIRECT_BUFFER_VALID;
3070 if (ib->is_const_ib) {
3071 /* set switch buffer packet before const IB */
3072 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3073 radeon_ring_write(ring, 0);
3075 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3078 if (ring->rptr_save_reg) {
3079 next_rptr = ring->wptr + 3 + 4;
3080 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3081 radeon_ring_write(ring, ((ring->rptr_save_reg -
3082 PACKET3_SET_UCONFIG_REG_START) >> 2));
3083 radeon_ring_write(ring, next_rptr);
3084 } else if (rdev->wb.enabled) {
3085 next_rptr = ring->wptr + 5 + 4;
3086 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3087 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3088 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3089 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3090 radeon_ring_write(ring, next_rptr);
3093 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3096 control |= ib->length_dw |
3097 (ib->vm ? (ib->vm->id << 24) : 0);
3099 radeon_ring_write(ring, header);
3100 radeon_ring_write(ring,
3104 (ib->gpu_addr & 0xFFFFFFFC));
3105 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3106 radeon_ring_write(ring, control);
3110 * cik_ib_test - basic gfx ring IB test
3112 * @rdev: radeon_device pointer
3113 * @ring: radeon_ring structure holding ring information
3115 * Allocate an IB and execute it on the gfx ring (CIK).
3116 * Provides a basic gfx ring test to verify that IBs are working.
3117 * Returns 0 on success, error on failure.
3119 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3121 struct radeon_ib ib;
3127 r = radeon_scratch_get(rdev, &scratch);
3129 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3132 WREG32(scratch, 0xCAFEDEAD);
3133 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3135 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3138 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3139 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3140 ib.ptr[2] = 0xDEADBEEF;
3142 r = radeon_ib_schedule(rdev, &ib, NULL);
3144 radeon_scratch_free(rdev, scratch);
3145 radeon_ib_free(rdev, &ib);
3146 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3149 r = radeon_fence_wait(ib.fence, false);
3151 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3154 for (i = 0; i < rdev->usec_timeout; i++) {
3155 tmp = RREG32(scratch);
3156 if (tmp == 0xDEADBEEF)
3160 if (i < rdev->usec_timeout) {
3161 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3163 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3167 radeon_scratch_free(rdev, scratch);
3168 radeon_ib_free(rdev, &ib);
3174 * On CIK, gfx and compute now have independant command processors.
3177 * Gfx consists of a single ring and can process both gfx jobs and
3178 * compute jobs. The gfx CP consists of three microengines (ME):
3179 * PFP - Pre-Fetch Parser
3181 * CE - Constant Engine
3182 * The PFP and ME make up what is considered the Drawing Engine (DE).
3183 * The CE is an asynchronous engine used for updating buffer desciptors
3184 * used by the DE so that they can be loaded into cache in parallel
3185 * while the DE is processing state update packets.
3188 * The compute CP consists of two microengines (ME):
3189 * MEC1 - Compute MicroEngine 1
3190 * MEC2 - Compute MicroEngine 2
3191 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3192 * The queues are exposed to userspace and are programmed directly
3193 * by the compute runtime.
3196 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3198 * @rdev: radeon_device pointer
3199 * @enable: enable or disable the MEs
3201 * Halts or unhalts the gfx MEs.
3203 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3206 WREG32(CP_ME_CNTL, 0);
3208 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3209 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3215 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3217 * @rdev: radeon_device pointer
3219 * Loads the gfx PFP, ME, and CE ucode.
3220 * Returns 0 for success, -EINVAL if the ucode is not available.
3222 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3224 const __be32 *fw_data;
3227 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3230 cik_cp_gfx_enable(rdev, false);
3233 fw_data = (const __be32 *)rdev->pfp_fw->data;
3234 WREG32(CP_PFP_UCODE_ADDR, 0);
3235 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3236 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3237 WREG32(CP_PFP_UCODE_ADDR, 0);
3240 fw_data = (const __be32 *)rdev->ce_fw->data;
3241 WREG32(CP_CE_UCODE_ADDR, 0);
3242 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3243 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3244 WREG32(CP_CE_UCODE_ADDR, 0);
3247 fw_data = (const __be32 *)rdev->me_fw->data;
3248 WREG32(CP_ME_RAM_WADDR, 0);
3249 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3250 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3251 WREG32(CP_ME_RAM_WADDR, 0);
3253 WREG32(CP_PFP_UCODE_ADDR, 0);
3254 WREG32(CP_CE_UCODE_ADDR, 0);
3255 WREG32(CP_ME_RAM_WADDR, 0);
3256 WREG32(CP_ME_RAM_RADDR, 0);
3261 * cik_cp_gfx_start - start the gfx ring
3263 * @rdev: radeon_device pointer
3265 * Enables the ring and loads the clear state context and other
3266 * packets required to init the ring.
3267 * Returns 0 for success, error for failure.
3269 static int cik_cp_gfx_start(struct radeon_device *rdev)
3271 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3275 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3276 WREG32(CP_ENDIAN_SWAP, 0);
3277 WREG32(CP_DEVICE_ID, 1);
3279 cik_cp_gfx_enable(rdev, true);
3281 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3283 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3287 /* init the CE partitions. CE only used for gfx on CIK */
3288 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3289 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3290 radeon_ring_write(ring, 0xc000);
3291 radeon_ring_write(ring, 0xc000);
3293 /* setup clear context state */
3294 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3295 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3297 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3298 radeon_ring_write(ring, 0x80000000);
3299 radeon_ring_write(ring, 0x80000000);
3301 for (i = 0; i < cik_default_size; i++)
3302 radeon_ring_write(ring, cik_default_state[i]);
3304 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3305 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3307 /* set clear context state */
3308 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3309 radeon_ring_write(ring, 0);
3311 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3312 radeon_ring_write(ring, 0x00000316);
3313 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3314 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3316 radeon_ring_unlock_commit(rdev, ring);
3322 * cik_cp_gfx_fini - stop the gfx ring
3324 * @rdev: radeon_device pointer
3326 * Stop the gfx ring and tear down the driver ring
3329 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3331 cik_cp_gfx_enable(rdev, false);
3332 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3336 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3338 * @rdev: radeon_device pointer
3340 * Program the location and size of the gfx ring buffer
3341 * and test it to make sure it's working.
3342 * Returns 0 for success, error for failure.
3344 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3346 struct radeon_ring *ring;
3352 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3353 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3355 /* Set the write pointer delay */
3356 WREG32(CP_RB_WPTR_DELAY, 0);
3358 /* set the RB to use vmid 0 */
3359 WREG32(CP_RB_VMID, 0);
3361 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3363 /* ring 0 - compute and gfx */
3364 /* Set ring buffer size */
3365 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3366 rb_bufsz = drm_order(ring->ring_size / 8);
3367 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3369 tmp |= BUF_SWAP_32BIT;
3371 WREG32(CP_RB0_CNTL, tmp);
3373 /* Initialize the ring buffer's read and write pointers */
3374 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3376 WREG32(CP_RB0_WPTR, ring->wptr);
3378 /* set the wb address wether it's enabled or not */
3379 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3380 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3382 /* scratch register shadowing is no longer supported */
3383 WREG32(SCRATCH_UMSK, 0);
3385 if (!rdev->wb.enabled)
3386 tmp |= RB_NO_UPDATE;
3389 WREG32(CP_RB0_CNTL, tmp);
3391 rb_addr = ring->gpu_addr >> 8;
3392 WREG32(CP_RB0_BASE, rb_addr);
3393 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3395 ring->rptr = RREG32(CP_RB0_RPTR);
3397 /* start the ring */
3398 cik_cp_gfx_start(rdev);
3399 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3400 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3402 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3408 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3409 struct radeon_ring *ring)
3415 if (rdev->wb.enabled) {
3416 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3418 mutex_lock(&rdev->srbm_mutex);
3419 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3420 rptr = RREG32(CP_HQD_PQ_RPTR);
3421 cik_srbm_select(rdev, 0, 0, 0, 0);
3422 mutex_unlock(&rdev->srbm_mutex);
3428 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3429 struct radeon_ring *ring)
3433 if (rdev->wb.enabled) {
3434 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3436 mutex_lock(&rdev->srbm_mutex);
3437 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3438 wptr = RREG32(CP_HQD_PQ_WPTR);
3439 cik_srbm_select(rdev, 0, 0, 0, 0);
3440 mutex_unlock(&rdev->srbm_mutex);
3446 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3447 struct radeon_ring *ring)
3449 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3450 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3454 * cik_cp_compute_enable - enable/disable the compute CP MEs
3456 * @rdev: radeon_device pointer
3457 * @enable: enable or disable the MEs
3459 * Halts or unhalts the compute MEs.
3461 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3464 WREG32(CP_MEC_CNTL, 0);
3466 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3471 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3473 * @rdev: radeon_device pointer
3475 * Loads the compute MEC1&2 ucode.
3476 * Returns 0 for success, -EINVAL if the ucode is not available.
3478 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3480 const __be32 *fw_data;
3486 cik_cp_compute_enable(rdev, false);
3489 fw_data = (const __be32 *)rdev->mec_fw->data;
3490 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3491 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3492 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3493 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3495 if (rdev->family == CHIP_KAVERI) {
3497 fw_data = (const __be32 *)rdev->mec_fw->data;
3498 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3499 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3500 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3501 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3508 * cik_cp_compute_start - start the compute queues
3510 * @rdev: radeon_device pointer
3512 * Enable the compute queues.
3513 * Returns 0 for success, error for failure.
3515 static int cik_cp_compute_start(struct radeon_device *rdev)
3517 cik_cp_compute_enable(rdev, true);
3523 * cik_cp_compute_fini - stop the compute queues
3525 * @rdev: radeon_device pointer
3527 * Stop the compute queues and tear down the driver queue
3530 static void cik_cp_compute_fini(struct radeon_device *rdev)
3534 cik_cp_compute_enable(rdev, false);
3536 for (i = 0; i < 2; i++) {
3538 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3540 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3542 if (rdev->ring[idx].mqd_obj) {
3543 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3544 if (unlikely(r != 0))
3545 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3547 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3548 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3550 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3551 rdev->ring[idx].mqd_obj = NULL;
3556 static void cik_mec_fini(struct radeon_device *rdev)
3560 if (rdev->mec.hpd_eop_obj) {
3561 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3562 if (unlikely(r != 0))
3563 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3564 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3565 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3567 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3568 rdev->mec.hpd_eop_obj = NULL;
3572 #define MEC_HPD_SIZE 2048
3574 static int cik_mec_init(struct radeon_device *rdev)
3580 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3581 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3583 if (rdev->family == CHIP_KAVERI)
3584 rdev->mec.num_mec = 2;
3586 rdev->mec.num_mec = 1;
3587 rdev->mec.num_pipe = 4;
3588 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3590 if (rdev->mec.hpd_eop_obj == NULL) {
3591 r = radeon_bo_create(rdev,
3592 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3594 RADEON_GEM_DOMAIN_GTT, NULL,
3595 &rdev->mec.hpd_eop_obj);
3597 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3602 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3603 if (unlikely(r != 0)) {
3607 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3608 &rdev->mec.hpd_eop_gpu_addr);
3610 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3614 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3616 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3621 /* clear memory. Not sure if this is required or not */
3622 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3624 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3625 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3630 struct hqd_registers
3632 u32 cp_mqd_base_addr;
3633 u32 cp_mqd_base_addr_hi;
3636 u32 cp_hqd_persistent_state;
3637 u32 cp_hqd_pipe_priority;
3638 u32 cp_hqd_queue_priority;
3641 u32 cp_hqd_pq_base_hi;
3643 u32 cp_hqd_pq_rptr_report_addr;
3644 u32 cp_hqd_pq_rptr_report_addr_hi;
3645 u32 cp_hqd_pq_wptr_poll_addr;
3646 u32 cp_hqd_pq_wptr_poll_addr_hi;
3647 u32 cp_hqd_pq_doorbell_control;
3649 u32 cp_hqd_pq_control;
3650 u32 cp_hqd_ib_base_addr;
3651 u32 cp_hqd_ib_base_addr_hi;
3653 u32 cp_hqd_ib_control;
3654 u32 cp_hqd_iq_timer;
3656 u32 cp_hqd_dequeue_request;
3657 u32 cp_hqd_dma_offload;
3658 u32 cp_hqd_sema_cmd;
3659 u32 cp_hqd_msg_type;
3660 u32 cp_hqd_atomic0_preop_lo;
3661 u32 cp_hqd_atomic0_preop_hi;
3662 u32 cp_hqd_atomic1_preop_lo;
3663 u32 cp_hqd_atomic1_preop_hi;
3664 u32 cp_hqd_hq_scheduler0;
3665 u32 cp_hqd_hq_scheduler1;
3672 u32 dispatch_initiator;
3676 u32 pipeline_stat_enable;
3677 u32 perf_counter_enable;
3683 u32 resource_limits;
3684 u32 static_thread_mgmt01[2];
3686 u32 static_thread_mgmt23[2];
3688 u32 thread_trace_enable;
3691 u32 vgtcs_invoke_count[2];
3692 struct hqd_registers queue_state;
3694 u32 interrupt_queue[64];
3698 * cik_cp_compute_resume - setup the compute queue registers
3700 * @rdev: radeon_device pointer
3702 * Program the compute queues and test them to make sure they
3704 * Returns 0 for success, error for failure.
3706 static int cik_cp_compute_resume(struct radeon_device *rdev)
3710 bool use_doorbell = true;
3716 struct bonaire_mqd *mqd;
3718 r = cik_cp_compute_start(rdev);
3722 /* fix up chicken bits */
3723 tmp = RREG32(CP_CPF_DEBUG);
3725 WREG32(CP_CPF_DEBUG, tmp);
3727 /* init the pipes */
3728 mutex_lock(&rdev->srbm_mutex);
3729 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3730 int me = (i < 4) ? 1 : 2;
3731 int pipe = (i < 4) ? i : (i - 4);
3733 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3735 cik_srbm_select(rdev, me, pipe, 0, 0);
3737 /* write the EOP addr */
3738 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3739 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3741 /* set the VMID assigned */
3742 WREG32(CP_HPD_EOP_VMID, 0);
3744 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3745 tmp = RREG32(CP_HPD_EOP_CONTROL);
3746 tmp &= ~EOP_SIZE_MASK;
3747 tmp |= drm_order(MEC_HPD_SIZE / 8);
3748 WREG32(CP_HPD_EOP_CONTROL, tmp);
3750 cik_srbm_select(rdev, 0, 0, 0, 0);
3751 mutex_unlock(&rdev->srbm_mutex);
3753 /* init the queues. Just two for now. */
3754 for (i = 0; i < 2; i++) {
3756 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3758 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3760 if (rdev->ring[idx].mqd_obj == NULL) {
3761 r = radeon_bo_create(rdev,
3762 sizeof(struct bonaire_mqd),
3764 RADEON_GEM_DOMAIN_GTT, NULL,
3765 &rdev->ring[idx].mqd_obj);
3767 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3772 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3773 if (unlikely(r != 0)) {
3774 cik_cp_compute_fini(rdev);
3777 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3780 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3781 cik_cp_compute_fini(rdev);
3784 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3786 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3787 cik_cp_compute_fini(rdev);
3791 /* doorbell offset */
3792 rdev->ring[idx].doorbell_offset =
3793 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3795 /* init the mqd struct */
3796 memset(buf, 0, sizeof(struct bonaire_mqd));
3798 mqd = (struct bonaire_mqd *)buf;
3799 mqd->header = 0xC0310800;
3800 mqd->static_thread_mgmt01[0] = 0xffffffff;
3801 mqd->static_thread_mgmt01[1] = 0xffffffff;
3802 mqd->static_thread_mgmt23[0] = 0xffffffff;
3803 mqd->static_thread_mgmt23[1] = 0xffffffff;
3805 mutex_lock(&rdev->srbm_mutex);
3806 cik_srbm_select(rdev, rdev->ring[idx].me,
3807 rdev->ring[idx].pipe,
3808 rdev->ring[idx].queue, 0);
3810 /* disable wptr polling */
3811 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3812 tmp &= ~WPTR_POLL_EN;
3813 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3815 /* enable doorbell? */
3816 mqd->queue_state.cp_hqd_pq_doorbell_control =
3817 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3819 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3821 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3822 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3823 mqd->queue_state.cp_hqd_pq_doorbell_control);
3825 /* disable the queue if it's active */
3826 mqd->queue_state.cp_hqd_dequeue_request = 0;
3827 mqd->queue_state.cp_hqd_pq_rptr = 0;
3828 mqd->queue_state.cp_hqd_pq_wptr= 0;
3829 if (RREG32(CP_HQD_ACTIVE) & 1) {
3830 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3831 for (i = 0; i < rdev->usec_timeout; i++) {
3832 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3836 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3837 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3838 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3841 /* set the pointer to the MQD */
3842 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3843 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3844 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3845 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3846 /* set MQD vmid to 0 */
3847 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3848 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3849 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3851 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3852 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3853 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3854 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3855 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3856 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3858 /* set up the HQD, this is similar to CP_RB0_CNTL */
3859 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3860 mqd->queue_state.cp_hqd_pq_control &=
3861 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3863 mqd->queue_state.cp_hqd_pq_control |=
3864 drm_order(rdev->ring[idx].ring_size / 8);
3865 mqd->queue_state.cp_hqd_pq_control |=
3866 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3868 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3870 mqd->queue_state.cp_hqd_pq_control &=
3871 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3872 mqd->queue_state.cp_hqd_pq_control |=
3873 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3874 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3876 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3878 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3880 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3881 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3882 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3883 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3884 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3885 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3887 /* set the wb address wether it's enabled or not */
3889 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3891 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3892 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3893 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3894 upper_32_bits(wb_gpu_addr) & 0xffff;
3895 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3896 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3897 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3898 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3900 /* enable the doorbell if requested */
3902 mqd->queue_state.cp_hqd_pq_doorbell_control =
3903 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3904 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3905 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3906 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3907 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3908 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3909 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3912 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3914 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3915 mqd->queue_state.cp_hqd_pq_doorbell_control);
3917 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3918 rdev->ring[idx].wptr = 0;
3919 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3920 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3921 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3922 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3924 /* set the vmid for the queue */
3925 mqd->queue_state.cp_hqd_vmid = 0;
3926 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3928 /* activate the queue */
3929 mqd->queue_state.cp_hqd_active = 1;
3930 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3932 cik_srbm_select(rdev, 0, 0, 0, 0);
3933 mutex_unlock(&rdev->srbm_mutex);
3935 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3936 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3938 rdev->ring[idx].ready = true;
3939 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3941 rdev->ring[idx].ready = false;
3947 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3949 cik_cp_gfx_enable(rdev, enable);
3950 cik_cp_compute_enable(rdev, enable);
3953 static int cik_cp_load_microcode(struct radeon_device *rdev)
3957 r = cik_cp_gfx_load_microcode(rdev);
3960 r = cik_cp_compute_load_microcode(rdev);
3967 static void cik_cp_fini(struct radeon_device *rdev)
3969 cik_cp_gfx_fini(rdev);
3970 cik_cp_compute_fini(rdev);
3973 static int cik_cp_resume(struct radeon_device *rdev)
3977 /* Reset all cp blocks */
3978 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3979 RREG32(GRBM_SOFT_RESET);
3981 WREG32(GRBM_SOFT_RESET, 0);
3982 RREG32(GRBM_SOFT_RESET);
3984 r = cik_cp_load_microcode(rdev);
3988 r = cik_cp_gfx_resume(rdev);
3991 r = cik_cp_compute_resume(rdev);
3998 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4000 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4001 RREG32(GRBM_STATUS));
4002 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4003 RREG32(GRBM_STATUS2));
4004 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4005 RREG32(GRBM_STATUS_SE0));
4006 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4007 RREG32(GRBM_STATUS_SE1));
4008 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4009 RREG32(GRBM_STATUS_SE2));
4010 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4011 RREG32(GRBM_STATUS_SE3));
4012 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4013 RREG32(SRBM_STATUS));
4014 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4015 RREG32(SRBM_STATUS2));
4016 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4017 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4018 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4019 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4020 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4021 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4022 RREG32(CP_STALLED_STAT1));
4023 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4024 RREG32(CP_STALLED_STAT2));
4025 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4026 RREG32(CP_STALLED_STAT3));
4027 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4028 RREG32(CP_CPF_BUSY_STAT));
4029 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4030 RREG32(CP_CPF_STALLED_STAT1));
4031 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4032 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4033 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4034 RREG32(CP_CPC_STALLED_STAT1));
4035 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4039 * cik_gpu_check_soft_reset - check which blocks are busy
4041 * @rdev: radeon_device pointer
4043 * Check which blocks are busy and return the relevant reset
4044 * mask to be used by cik_gpu_soft_reset().
4045 * Returns a mask of the blocks to be reset.
4047 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4053 tmp = RREG32(GRBM_STATUS);
4054 if (tmp & (PA_BUSY | SC_BUSY |
4055 BCI_BUSY | SX_BUSY |
4056 TA_BUSY | VGT_BUSY |
4058 GDS_BUSY | SPI_BUSY |
4059 IA_BUSY | IA_BUSY_NO_DMA))
4060 reset_mask |= RADEON_RESET_GFX;
4062 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4063 reset_mask |= RADEON_RESET_CP;
4066 tmp = RREG32(GRBM_STATUS2);
4068 reset_mask |= RADEON_RESET_RLC;
4070 /* SDMA0_STATUS_REG */
4071 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4072 if (!(tmp & SDMA_IDLE))
4073 reset_mask |= RADEON_RESET_DMA;
4075 /* SDMA1_STATUS_REG */
4076 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4077 if (!(tmp & SDMA_IDLE))
4078 reset_mask |= RADEON_RESET_DMA1;
4081 tmp = RREG32(SRBM_STATUS2);
4082 if (tmp & SDMA_BUSY)
4083 reset_mask |= RADEON_RESET_DMA;
4085 if (tmp & SDMA1_BUSY)
4086 reset_mask |= RADEON_RESET_DMA1;
4089 tmp = RREG32(SRBM_STATUS);
4092 reset_mask |= RADEON_RESET_IH;
4095 reset_mask |= RADEON_RESET_SEM;
4097 if (tmp & GRBM_RQ_PENDING)
4098 reset_mask |= RADEON_RESET_GRBM;
4101 reset_mask |= RADEON_RESET_VMC;
4103 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4104 MCC_BUSY | MCD_BUSY))
4105 reset_mask |= RADEON_RESET_MC;
4107 if (evergreen_is_display_hung(rdev))
4108 reset_mask |= RADEON_RESET_DISPLAY;
4110 /* Skip MC reset as it's mostly likely not hung, just busy */
4111 if (reset_mask & RADEON_RESET_MC) {
4112 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4113 reset_mask &= ~RADEON_RESET_MC;
4120 * cik_gpu_soft_reset - soft reset GPU
4122 * @rdev: radeon_device pointer
4123 * @reset_mask: mask of which blocks to reset
4125 * Soft reset the blocks specified in @reset_mask.
4127 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4129 struct evergreen_mc_save save;
4130 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4133 if (reset_mask == 0)
4136 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4138 cik_print_gpu_status_regs(rdev);
4139 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4140 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4141 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4142 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4147 /* Disable GFX parsing/prefetching */
4148 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4150 /* Disable MEC parsing/prefetching */
4151 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4153 if (reset_mask & RADEON_RESET_DMA) {
4155 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4157 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4159 if (reset_mask & RADEON_RESET_DMA1) {
4161 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4163 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4166 evergreen_mc_stop(rdev, &save);
4167 if (evergreen_mc_wait_for_idle(rdev)) {
4168 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4171 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4172 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4174 if (reset_mask & RADEON_RESET_CP) {
4175 grbm_soft_reset |= SOFT_RESET_CP;
4177 srbm_soft_reset |= SOFT_RESET_GRBM;
4180 if (reset_mask & RADEON_RESET_DMA)
4181 srbm_soft_reset |= SOFT_RESET_SDMA;
4183 if (reset_mask & RADEON_RESET_DMA1)
4184 srbm_soft_reset |= SOFT_RESET_SDMA1;
4186 if (reset_mask & RADEON_RESET_DISPLAY)
4187 srbm_soft_reset |= SOFT_RESET_DC;
4189 if (reset_mask & RADEON_RESET_RLC)
4190 grbm_soft_reset |= SOFT_RESET_RLC;
4192 if (reset_mask & RADEON_RESET_SEM)
4193 srbm_soft_reset |= SOFT_RESET_SEM;
4195 if (reset_mask & RADEON_RESET_IH)
4196 srbm_soft_reset |= SOFT_RESET_IH;
4198 if (reset_mask & RADEON_RESET_GRBM)
4199 srbm_soft_reset |= SOFT_RESET_GRBM;
4201 if (reset_mask & RADEON_RESET_VMC)
4202 srbm_soft_reset |= SOFT_RESET_VMC;
4204 if (!(rdev->flags & RADEON_IS_IGP)) {
4205 if (reset_mask & RADEON_RESET_MC)
4206 srbm_soft_reset |= SOFT_RESET_MC;
4209 if (grbm_soft_reset) {
4210 tmp = RREG32(GRBM_SOFT_RESET);
4211 tmp |= grbm_soft_reset;
4212 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4213 WREG32(GRBM_SOFT_RESET, tmp);
4214 tmp = RREG32(GRBM_SOFT_RESET);
4218 tmp &= ~grbm_soft_reset;
4219 WREG32(GRBM_SOFT_RESET, tmp);
4220 tmp = RREG32(GRBM_SOFT_RESET);
4223 if (srbm_soft_reset) {
4224 tmp = RREG32(SRBM_SOFT_RESET);
4225 tmp |= srbm_soft_reset;
4226 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4227 WREG32(SRBM_SOFT_RESET, tmp);
4228 tmp = RREG32(SRBM_SOFT_RESET);
4232 tmp &= ~srbm_soft_reset;
4233 WREG32(SRBM_SOFT_RESET, tmp);
4234 tmp = RREG32(SRBM_SOFT_RESET);
4237 /* Wait a little for things to settle down */
4240 evergreen_mc_resume(rdev, &save);
4243 cik_print_gpu_status_regs(rdev);
4247 * cik_asic_reset - soft reset GPU
4249 * @rdev: radeon_device pointer
4251 * Look up which blocks are hung and attempt
4253 * Returns 0 for success.
4255 int cik_asic_reset(struct radeon_device *rdev)
4259 reset_mask = cik_gpu_check_soft_reset(rdev);
4262 r600_set_bios_scratch_engine_hung(rdev, true);
4264 cik_gpu_soft_reset(rdev, reset_mask);
4266 reset_mask = cik_gpu_check_soft_reset(rdev);
4269 r600_set_bios_scratch_engine_hung(rdev, false);
4275 * cik_gfx_is_lockup - check if the 3D engine is locked up
4277 * @rdev: radeon_device pointer
4278 * @ring: radeon_ring structure holding ring information
4280 * Check if the 3D engine is locked up (CIK).
4281 * Returns true if the engine is locked, false if not.
4283 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4285 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4287 if (!(reset_mask & (RADEON_RESET_GFX |
4288 RADEON_RESET_COMPUTE |
4289 RADEON_RESET_CP))) {
4290 radeon_ring_lockup_update(ring);
4293 /* force CP activities */
4294 radeon_ring_force_activity(rdev, ring);
4295 return radeon_ring_test_lockup(rdev, ring);
4300 * cik_mc_program - program the GPU memory controller
4302 * @rdev: radeon_device pointer
4304 * Set the location of vram, gart, and AGP in the GPU's
4305 * physical address space (CIK).
4307 static void cik_mc_program(struct radeon_device *rdev)
4309 struct evergreen_mc_save save;
4313 /* Initialize HDP */
4314 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4315 WREG32((0x2c14 + j), 0x00000000);
4316 WREG32((0x2c18 + j), 0x00000000);
4317 WREG32((0x2c1c + j), 0x00000000);
4318 WREG32((0x2c20 + j), 0x00000000);
4319 WREG32((0x2c24 + j), 0x00000000);
4321 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4323 evergreen_mc_stop(rdev, &save);
4324 if (radeon_mc_wait_for_idle(rdev)) {
4325 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4327 /* Lockout access through VGA aperture*/
4328 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4329 /* Update configuration */
4330 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4331 rdev->mc.vram_start >> 12);
4332 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4333 rdev->mc.vram_end >> 12);
4334 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4335 rdev->vram_scratch.gpu_addr >> 12);
4336 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4337 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4338 WREG32(MC_VM_FB_LOCATION, tmp);
4339 /* XXX double check these! */
4340 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4341 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4342 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4343 WREG32(MC_VM_AGP_BASE, 0);
4344 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4345 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4346 if (radeon_mc_wait_for_idle(rdev)) {
4347 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4349 evergreen_mc_resume(rdev, &save);
4350 /* we need to own VRAM, so turn off the VGA renderer here
4351 * to stop it overwriting our objects */
4352 rv515_vga_render_disable(rdev);
4356 * cik_mc_init - initialize the memory controller driver params
4358 * @rdev: radeon_device pointer
4360 * Look up the amount of vram, vram width, and decide how to place
4361 * vram and gart within the GPU's physical address space (CIK).
4362 * Returns 0 for success.
4364 static int cik_mc_init(struct radeon_device *rdev)
4367 int chansize, numchan;
4369 /* Get VRAM informations */
4370 rdev->mc.vram_is_ddr = true;
4371 tmp = RREG32(MC_ARB_RAMCFG);
4372 if (tmp & CHANSIZE_MASK) {
4377 tmp = RREG32(MC_SHARED_CHMAP);
4378 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4408 rdev->mc.vram_width = numchan * chansize;
4409 /* Could aper size report 0 ? */
4410 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4411 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4412 /* size in MB on si */
4413 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4414 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4415 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4416 si_vram_gtt_location(rdev, &rdev->mc);
4417 radeon_update_bandwidth_info(rdev);
4424 * VMID 0 is the physical GPU addresses as used by the kernel.
4425 * VMIDs 1-15 are used for userspace clients and are handled
4426 * by the radeon vm/hsa code.
4429 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4431 * @rdev: radeon_device pointer
4433 * Flush the TLB for the VMID 0 page table (CIK).
4435 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4437 /* flush hdp cache */
4438 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4440 /* bits 0-15 are the VM contexts0-15 */
4441 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4445 * cik_pcie_gart_enable - gart enable
4447 * @rdev: radeon_device pointer
4449 * This sets up the TLBs, programs the page tables for VMID0,
4450 * sets up the hw for VMIDs 1-15 which are allocated on
4451 * demand, and sets up the global locations for the LDS, GDS,
4452 * and GPUVM for FSA64 clients (CIK).
4453 * Returns 0 for success, errors for failure.
4455 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4459 if (rdev->gart.robj == NULL) {
4460 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4463 r = radeon_gart_table_vram_pin(rdev);
4466 radeon_gart_restore(rdev);
4467 /* Setup TLB control */
4468 WREG32(MC_VM_MX_L1_TLB_CNTL,
4471 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4472 ENABLE_ADVANCED_DRIVER_MODEL |
4473 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4474 /* Setup L2 cache */
4475 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4476 ENABLE_L2_FRAGMENT_PROCESSING |
4477 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4478 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4479 EFFECTIVE_L2_QUEUE_SIZE(7) |
4480 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4481 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4482 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4483 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4484 /* setup context0 */
4485 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4486 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4487 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4488 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4489 (u32)(rdev->dummy_page.addr >> 12));
4490 WREG32(VM_CONTEXT0_CNTL2, 0);
4491 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4492 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4498 /* empty context1-15 */
4499 /* FIXME start with 4G, once using 2 level pt switch to full
4502 /* set vm size, must be a multiple of 4 */
4503 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4504 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4505 for (i = 1; i < 16; i++) {
4507 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4508 rdev->gart.table_addr >> 12);
4510 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4511 rdev->gart.table_addr >> 12);
4514 /* enable context1-15 */
4515 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4516 (u32)(rdev->dummy_page.addr >> 12));
4517 WREG32(VM_CONTEXT1_CNTL2, 4);
4518 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4519 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4520 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4521 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4522 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4523 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4524 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4525 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4526 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4527 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4528 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4529 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4530 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4532 /* TC cache setup ??? */
4533 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4534 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4535 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4537 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4538 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4539 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4540 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4541 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4543 WREG32(TC_CFG_L1_VOLATILE, 0);
4544 WREG32(TC_CFG_L2_VOLATILE, 0);
4546 if (rdev->family == CHIP_KAVERI) {
4547 u32 tmp = RREG32(CHUB_CONTROL);
4549 WREG32(CHUB_CONTROL, tmp);
4552 /* XXX SH_MEM regs */
4553 /* where to put LDS, scratch, GPUVM in FSA64 space */
4554 mutex_lock(&rdev->srbm_mutex);
4555 for (i = 0; i < 16; i++) {
4556 cik_srbm_select(rdev, 0, 0, 0, i);
4557 /* CP and shaders */
4558 WREG32(SH_MEM_CONFIG, 0);
4559 WREG32(SH_MEM_APE1_BASE, 1);
4560 WREG32(SH_MEM_APE1_LIMIT, 0);
4561 WREG32(SH_MEM_BASES, 0);
4563 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4564 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4565 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4566 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4567 /* XXX SDMA RLC - todo */
4569 cik_srbm_select(rdev, 0, 0, 0, 0);
4570 mutex_unlock(&rdev->srbm_mutex);
4572 cik_pcie_gart_tlb_flush(rdev);
4573 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4574 (unsigned)(rdev->mc.gtt_size >> 20),
4575 (unsigned long long)rdev->gart.table_addr);
4576 rdev->gart.ready = true;
4581 * cik_pcie_gart_disable - gart disable
4583 * @rdev: radeon_device pointer
4585 * This disables all VM page table (CIK).
4587 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4589 /* Disable all tables */
4590 WREG32(VM_CONTEXT0_CNTL, 0);
4591 WREG32(VM_CONTEXT1_CNTL, 0);
4592 /* Setup TLB control */
4593 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4594 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4595 /* Setup L2 cache */
4597 ENABLE_L2_FRAGMENT_PROCESSING |
4598 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4599 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4600 EFFECTIVE_L2_QUEUE_SIZE(7) |
4601 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4602 WREG32(VM_L2_CNTL2, 0);
4603 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4604 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4605 radeon_gart_table_vram_unpin(rdev);
4609 * cik_pcie_gart_fini - vm fini callback
4611 * @rdev: radeon_device pointer
4613 * Tears down the driver GART/VM setup (CIK).
4615 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4617 cik_pcie_gart_disable(rdev);
4618 radeon_gart_table_vram_free(rdev);
4619 radeon_gart_fini(rdev);
4624 * cik_ib_parse - vm ib_parse callback
4626 * @rdev: radeon_device pointer
4627 * @ib: indirect buffer pointer
4629 * CIK uses hw IB checking so this is a nop (CIK).
4631 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4638 * VMID 0 is the physical GPU addresses as used by the kernel.
4639 * VMIDs 1-15 are used for userspace clients and are handled
4640 * by the radeon vm/hsa code.
4643 * cik_vm_init - cik vm init callback
4645 * @rdev: radeon_device pointer
4647 * Inits cik specific vm parameters (number of VMs, base of vram for
4648 * VMIDs 1-15) (CIK).
4649 * Returns 0 for success.
4651 int cik_vm_init(struct radeon_device *rdev)
4654 rdev->vm_manager.nvm = 16;
4655 /* base offset of vram pages */
4656 if (rdev->flags & RADEON_IS_IGP) {
4657 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4659 rdev->vm_manager.vram_base_offset = tmp;
4661 rdev->vm_manager.vram_base_offset = 0;
4667 * cik_vm_fini - cik vm fini callback
4669 * @rdev: radeon_device pointer
4671 * Tear down any asic specific VM setup (CIK).
4673 void cik_vm_fini(struct radeon_device *rdev)
4678 * cik_vm_decode_fault - print human readable fault info
4680 * @rdev: radeon_device pointer
4681 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4682 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4684 * Print human readable fault information (CIK).
4686 static void cik_vm_decode_fault(struct radeon_device *rdev,
4687 u32 status, u32 addr, u32 mc_client)
4689 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4690 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4691 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4692 char *block = (char *)&mc_client;
4694 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4695 protections, vmid, addr,
4696 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4701 * cik_vm_flush - cik vm flush using the CP
4703 * @rdev: radeon_device pointer
4705 * Update the page table base and flush the VM TLB
4706 * using the CP (CIK).
4708 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4710 struct radeon_ring *ring = &rdev->ring[ridx];
4715 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4716 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4717 WRITE_DATA_DST_SEL(0)));
4719 radeon_ring_write(ring,
4720 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4722 radeon_ring_write(ring,
4723 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4725 radeon_ring_write(ring, 0);
4726 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4728 /* update SH_MEM_* regs */
4729 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4730 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4731 WRITE_DATA_DST_SEL(0)));
4732 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4733 radeon_ring_write(ring, 0);
4734 radeon_ring_write(ring, VMID(vm->id));
4736 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4737 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4738 WRITE_DATA_DST_SEL(0)));
4739 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4740 radeon_ring_write(ring, 0);
4742 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4743 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4744 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4745 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4747 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4748 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4749 WRITE_DATA_DST_SEL(0)));
4750 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4751 radeon_ring_write(ring, 0);
4752 radeon_ring_write(ring, VMID(0));
4755 /* We should be using the WAIT_REG_MEM packet here like in
4756 * cik_fence_ring_emit(), but it causes the CP to hang in this
4759 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4760 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4761 WRITE_DATA_DST_SEL(0)));
4762 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4763 radeon_ring_write(ring, 0);
4764 radeon_ring_write(ring, 0);
4766 /* bits 0-15 are the VM contexts0-15 */
4767 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4768 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4769 WRITE_DATA_DST_SEL(0)));
4770 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4771 radeon_ring_write(ring, 0);
4772 radeon_ring_write(ring, 1 << vm->id);
4774 /* compute doesn't have PFP */
4775 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4776 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4777 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4778 radeon_ring_write(ring, 0x0);
4783 * cik_vm_set_page - update the page tables using sDMA
4785 * @rdev: radeon_device pointer
4786 * @ib: indirect buffer to fill with commands
4787 * @pe: addr of the page entry
4788 * @addr: dst addr to write into pe
4789 * @count: number of page entries to update
4790 * @incr: increase next addr by incr bytes
4791 * @flags: access flags
4793 * Update the page tables using CP or sDMA (CIK).
4795 void cik_vm_set_page(struct radeon_device *rdev,
4796 struct radeon_ib *ib,
4798 uint64_t addr, unsigned count,
4799 uint32_t incr, uint32_t flags)
4801 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4805 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4808 ndw = 2 + count * 2;
4812 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4813 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4814 WRITE_DATA_DST_SEL(1));
4815 ib->ptr[ib->length_dw++] = pe;
4816 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4817 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4818 if (flags & RADEON_VM_PAGE_SYSTEM) {
4819 value = radeon_vm_map_gart(rdev, addr);
4820 value &= 0xFFFFFFFFFFFFF000ULL;
4821 } else if (flags & RADEON_VM_PAGE_VALID) {
4827 value |= r600_flags;
4828 ib->ptr[ib->length_dw++] = value;
4829 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4834 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4840 * The RLC is a multi-purpose microengine that handles a
4841 * variety of functions, the most important of which is
4842 * the interrupt controller.
4844 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4847 u32 tmp = RREG32(CP_INT_CNTL_RING0);
4850 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4852 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4853 WREG32(CP_INT_CNTL_RING0, tmp);
4856 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4860 tmp = RREG32(RLC_LB_CNTL);
4862 tmp |= LOAD_BALANCE_ENABLE;
4864 tmp &= ~LOAD_BALANCE_ENABLE;
4865 WREG32(RLC_LB_CNTL, tmp);
4868 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4873 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4874 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4875 cik_select_se_sh(rdev, i, j);
4876 for (k = 0; k < rdev->usec_timeout; k++) {
4877 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4883 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4885 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4886 for (k = 0; k < rdev->usec_timeout; k++) {
4887 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4893 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4897 tmp = RREG32(RLC_CNTL);
4899 WREG32(RLC_CNTL, rlc);
4902 static u32 cik_halt_rlc(struct radeon_device *rdev)
4906 orig = data = RREG32(RLC_CNTL);
4908 if (data & RLC_ENABLE) {
4911 data &= ~RLC_ENABLE;
4912 WREG32(RLC_CNTL, data);
4914 for (i = 0; i < rdev->usec_timeout; i++) {
4915 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4920 cik_wait_for_rlc_serdes(rdev);
4926 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4930 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4931 WREG32(RLC_GPR_REG2, tmp);
4933 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4934 for (i = 0; i < rdev->usec_timeout; i++) {
4935 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4940 for (i = 0; i < rdev->usec_timeout; i++) {
4941 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4947 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4951 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4952 WREG32(RLC_GPR_REG2, tmp);
4956 * cik_rlc_stop - stop the RLC ME
4958 * @rdev: radeon_device pointer
4960 * Halt the RLC ME (MicroEngine) (CIK).
4962 static void cik_rlc_stop(struct radeon_device *rdev)
4964 WREG32(RLC_CNTL, 0);
4966 cik_enable_gui_idle_interrupt(rdev, false);
4968 cik_wait_for_rlc_serdes(rdev);
4972 * cik_rlc_start - start the RLC ME
4974 * @rdev: radeon_device pointer
4976 * Unhalt the RLC ME (MicroEngine) (CIK).
4978 static void cik_rlc_start(struct radeon_device *rdev)
4980 WREG32(RLC_CNTL, RLC_ENABLE);
4982 cik_enable_gui_idle_interrupt(rdev, true);
4988 * cik_rlc_resume - setup the RLC hw
4990 * @rdev: radeon_device pointer
4992 * Initialize the RLC registers, load the ucode,
4993 * and start the RLC (CIK).
4994 * Returns 0 for success, -EINVAL if the ucode is not available.
4996 static int cik_rlc_resume(struct radeon_device *rdev)
4999 const __be32 *fw_data;
5004 switch (rdev->family) {
5007 size = BONAIRE_RLC_UCODE_SIZE;
5010 size = KV_RLC_UCODE_SIZE;
5013 size = KB_RLC_UCODE_SIZE;
5020 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5021 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5029 WREG32(RLC_LB_CNTR_INIT, 0);
5030 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5032 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5033 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5034 WREG32(RLC_LB_PARAMS, 0x00600408);
5035 WREG32(RLC_LB_CNTL, 0x80000004);
5037 WREG32(RLC_MC_CNTL, 0);
5038 WREG32(RLC_UCODE_CNTL, 0);
5040 fw_data = (const __be32 *)rdev->rlc_fw->data;
5041 WREG32(RLC_GPM_UCODE_ADDR, 0);
5042 for (i = 0; i < size; i++)
5043 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5044 WREG32(RLC_GPM_UCODE_ADDR, 0);
5046 /* XXX - find out what chips support lbpw */
5047 cik_enable_lbpw(rdev, false);
5049 if (rdev->family == CHIP_BONAIRE)
5050 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5052 cik_rlc_start(rdev);
5057 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5059 u32 data, orig, tmp, tmp2;
5061 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5063 cik_enable_gui_idle_interrupt(rdev, enable);
5066 tmp = cik_halt_rlc(rdev);
5068 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5069 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5070 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5071 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5072 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5074 cik_update_rlc(rdev, tmp);
5076 data |= CGCG_EN | CGLS_EN;
5078 RREG32(CB_CGTT_SCLK_CTRL);
5079 RREG32(CB_CGTT_SCLK_CTRL);
5080 RREG32(CB_CGTT_SCLK_CTRL);
5081 RREG32(CB_CGTT_SCLK_CTRL);
5083 data &= ~(CGCG_EN | CGLS_EN);
5087 WREG32(RLC_CGCG_CGLS_CTRL, data);
5091 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5093 u32 data, orig, tmp = 0;
5096 orig = data = RREG32(CP_MEM_SLP_CNTL);
5097 data |= CP_MEM_LS_EN;
5099 WREG32(CP_MEM_SLP_CNTL, data);
5101 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5104 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5106 tmp = cik_halt_rlc(rdev);
5108 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5109 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5110 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5111 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5112 WREG32(RLC_SERDES_WR_CTRL, data);
5114 cik_update_rlc(rdev, tmp);
5116 orig = data = RREG32(CGTS_SM_CTRL_REG);
5117 data &= ~SM_MODE_MASK;
5118 data |= SM_MODE(0x2);
5119 data |= SM_MODE_ENABLE;
5120 data &= ~CGTS_OVERRIDE;
5121 data &= ~CGTS_LS_OVERRIDE;
5122 data &= ~ON_MONITOR_ADD_MASK;
5123 data |= ON_MONITOR_ADD_EN;
5124 data |= ON_MONITOR_ADD(0x96);
5126 WREG32(CGTS_SM_CTRL_REG, data);
5128 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5131 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5133 data = RREG32(RLC_MEM_SLP_CNTL);
5134 if (data & RLC_MEM_LS_EN) {
5135 data &= ~RLC_MEM_LS_EN;
5136 WREG32(RLC_MEM_SLP_CNTL, data);
5139 data = RREG32(CP_MEM_SLP_CNTL);
5140 if (data & CP_MEM_LS_EN) {
5141 data &= ~CP_MEM_LS_EN;
5142 WREG32(CP_MEM_SLP_CNTL, data);
5145 orig = data = RREG32(CGTS_SM_CTRL_REG);
5146 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5148 WREG32(CGTS_SM_CTRL_REG, data);
5150 tmp = cik_halt_rlc(rdev);
5152 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5153 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5154 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5155 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5156 WREG32(RLC_SERDES_WR_CTRL, data);
5158 cik_update_rlc(rdev, tmp);
5162 static const u32 mc_cg_registers[] =
5175 static void cik_enable_mc_ls(struct radeon_device *rdev,
5181 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5182 orig = data = RREG32(mc_cg_registers[i]);
5184 data |= MC_LS_ENABLE;
5186 data &= ~MC_LS_ENABLE;
5188 WREG32(mc_cg_registers[i], data);
5192 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5198 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5199 orig = data = RREG32(mc_cg_registers[i]);
5201 data |= MC_CG_ENABLE;
5203 data &= ~MC_CG_ENABLE;
5205 WREG32(mc_cg_registers[i], data);
5209 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5215 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5216 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5218 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5221 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5223 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5226 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5230 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5236 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5239 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5241 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5244 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5246 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5249 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5251 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5254 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5258 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5264 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5266 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5268 orig = data = RREG32(UVD_CGC_CTRL);
5271 WREG32(UVD_CGC_CTRL, data);
5273 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5275 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5277 orig = data = RREG32(UVD_CGC_CTRL);
5280 WREG32(UVD_CGC_CTRL, data);
5284 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5289 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5292 data &= ~CLOCK_GATING_DIS;
5294 data |= CLOCK_GATING_DIS;
5297 WREG32(HDP_HOST_PATH_CNTL, data);
5300 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5305 orig = data = RREG32(HDP_MEM_POWER_LS);
5308 data |= HDP_LS_ENABLE;
5310 data &= ~HDP_LS_ENABLE;
5313 WREG32(HDP_MEM_POWER_LS, data);
5316 void cik_update_cg(struct radeon_device *rdev,
5317 u32 block, bool enable)
5319 if (block & RADEON_CG_BLOCK_GFX) {
5320 /* order matters! */
5322 cik_enable_mgcg(rdev, true);
5323 cik_enable_cgcg(rdev, true);
5325 cik_enable_cgcg(rdev, false);
5326 cik_enable_mgcg(rdev, false);
5330 if (block & RADEON_CG_BLOCK_MC) {
5331 if (!(rdev->flags & RADEON_IS_IGP)) {
5332 cik_enable_mc_mgcg(rdev, enable);
5333 cik_enable_mc_ls(rdev, enable);
5337 if (block & RADEON_CG_BLOCK_SDMA) {
5338 cik_enable_sdma_mgcg(rdev, enable);
5339 cik_enable_sdma_mgls(rdev, enable);
5342 if (block & RADEON_CG_BLOCK_UVD) {
5344 cik_enable_uvd_mgcg(rdev, enable);
5347 if (block & RADEON_CG_BLOCK_HDP) {
5348 cik_enable_hdp_mgcg(rdev, enable);
5349 cik_enable_hdp_ls(rdev, enable);
5353 static void cik_init_cg(struct radeon_device *rdev)
5356 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
5359 si_init_uvd_internal_cg(rdev);
5361 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5362 RADEON_CG_BLOCK_SDMA |
5363 RADEON_CG_BLOCK_UVD |
5364 RADEON_CG_BLOCK_HDP), true);
5367 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5372 orig = data = RREG32(RLC_PG_CNTL);
5374 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5376 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5378 WREG32(RLC_PG_CNTL, data);
5381 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5386 orig = data = RREG32(RLC_PG_CNTL);
5388 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5390 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5392 WREG32(RLC_PG_CNTL, data);
5395 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5399 orig = data = RREG32(RLC_PG_CNTL);
5401 data &= ~DISABLE_CP_PG;
5403 data |= DISABLE_CP_PG;
5405 WREG32(RLC_PG_CNTL, data);
5408 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5412 orig = data = RREG32(RLC_PG_CNTL);
5414 data &= ~DISABLE_GDS_PG;
5416 data |= DISABLE_GDS_PG;
5418 WREG32(RLC_PG_CNTL, data);
5421 #define CP_ME_TABLE_SIZE 96
5422 #define CP_ME_TABLE_OFFSET 2048
5423 #define CP_MEC_TABLE_OFFSET 4096
5425 void cik_init_cp_pg_table(struct radeon_device *rdev)
5427 const __be32 *fw_data;
5428 volatile u32 *dst_ptr;
5429 int me, i, max_me = 4;
5433 if (rdev->family == CHIP_KAVERI)
5436 if (rdev->rlc.cp_table_ptr == NULL)
5439 /* write the cp table buffer */
5440 dst_ptr = rdev->rlc.cp_table_ptr;
5441 for (me = 0; me < max_me; me++) {
5443 fw_data = (const __be32 *)rdev->ce_fw->data;
5444 table_offset = CP_ME_TABLE_OFFSET;
5445 } else if (me == 1) {
5446 fw_data = (const __be32 *)rdev->pfp_fw->data;
5447 table_offset = CP_ME_TABLE_OFFSET;
5448 } else if (me == 2) {
5449 fw_data = (const __be32 *)rdev->me_fw->data;
5450 table_offset = CP_ME_TABLE_OFFSET;
5452 fw_data = (const __be32 *)rdev->mec_fw->data;
5453 table_offset = CP_MEC_TABLE_OFFSET;
5456 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5457 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5459 bo_offset += CP_ME_TABLE_SIZE;
5463 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5469 orig = data = RREG32(RLC_PG_CNTL);
5470 data |= GFX_PG_ENABLE;
5472 WREG32(RLC_PG_CNTL, data);
5474 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5477 WREG32(RLC_AUTO_PG_CTRL, data);
5479 orig = data = RREG32(RLC_PG_CNTL);
5480 data &= ~GFX_PG_ENABLE;
5482 WREG32(RLC_PG_CNTL, data);
5484 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5485 data &= ~AUTO_PG_EN;
5487 WREG32(RLC_AUTO_PG_CTRL, data);
5489 data = RREG32(DB_RENDER_CONTROL);
5493 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5495 u32 mask = 0, tmp, tmp1;
5498 cik_select_se_sh(rdev, se, sh);
5499 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5500 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5501 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5508 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5513 return (~tmp) & mask;
5516 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5518 u32 i, j, k, active_cu_number = 0;
5519 u32 mask, counter, cu_bitmap;
5522 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5523 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5527 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5528 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5536 active_cu_number += counter;
5537 tmp |= (cu_bitmap << (i * 16 + j * 8));
5541 WREG32(RLC_PG_AO_CU_MASK, tmp);
5543 tmp = RREG32(RLC_MAX_PG_CU);
5544 tmp &= ~MAX_PU_CU_MASK;
5545 tmp |= MAX_PU_CU(active_cu_number);
5546 WREG32(RLC_MAX_PG_CU, tmp);
5549 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5554 orig = data = RREG32(RLC_PG_CNTL);
5556 data |= STATIC_PER_CU_PG_ENABLE;
5558 data &= ~STATIC_PER_CU_PG_ENABLE;
5560 WREG32(RLC_PG_CNTL, data);
5563 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5568 orig = data = RREG32(RLC_PG_CNTL);
5570 data |= DYN_PER_CU_PG_ENABLE;
5572 data &= ~DYN_PER_CU_PG_ENABLE;
5574 WREG32(RLC_PG_CNTL, data);
5577 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5578 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5580 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5585 if (rdev->rlc.cs_data) {
5586 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5587 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5588 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
5589 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5591 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5592 for (i = 0; i < 3; i++)
5593 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5595 if (rdev->rlc.reg_list) {
5596 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5597 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5598 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5601 orig = data = RREG32(RLC_PG_CNTL);
5604 WREG32(RLC_PG_CNTL, data);
5606 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5607 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5609 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5610 data &= ~IDLE_POLL_COUNT_MASK;
5611 data |= IDLE_POLL_COUNT(0x60);
5612 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5615 WREG32(RLC_PG_DELAY, data);
5617 data = RREG32(RLC_PG_DELAY_2);
5620 WREG32(RLC_PG_DELAY_2, data);
5622 data = RREG32(RLC_AUTO_PG_CTRL);
5623 data &= ~GRBM_REG_SGIT_MASK;
5624 data |= GRBM_REG_SGIT(0x700);
5625 WREG32(RLC_AUTO_PG_CTRL, data);
5629 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5631 bool has_pg = false;
5632 bool has_dyn_mgpg = false;
5633 bool has_static_mgpg = false;
5635 /* only APUs have PG */
5636 if (rdev->flags & RADEON_IS_IGP) {
5638 has_static_mgpg = true;
5639 if (rdev->family == CHIP_KAVERI)
5640 has_dyn_mgpg = true;
5644 cik_enable_gfx_cgpg(rdev, enable);
5646 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
5647 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
5649 cik_enable_gfx_static_mgpg(rdev, false);
5650 cik_enable_gfx_dynamic_mgpg(rdev, false);
5656 void cik_init_pg(struct radeon_device *rdev)
5658 bool has_pg = false;
5660 /* only APUs have PG */
5661 if (rdev->flags & RADEON_IS_IGP) {
5662 /* XXX disable this for now */
5663 /* has_pg = true; */
5667 cik_enable_sck_slowdown_on_pu(rdev, true);
5668 cik_enable_sck_slowdown_on_pd(rdev, true);
5669 cik_init_gfx_cgpg(rdev);
5670 cik_enable_cp_pg(rdev, true);
5671 cik_enable_gds_pg(rdev, true);
5672 cik_init_ao_cu_mask(rdev);
5673 cik_update_gfx_pg(rdev, true);
5679 * Starting with r6xx, interrupts are handled via a ring buffer.
5680 * Ring buffers are areas of GPU accessible memory that the GPU
5681 * writes interrupt vectors into and the host reads vectors out of.
5682 * There is a rptr (read pointer) that determines where the
5683 * host is currently reading, and a wptr (write pointer)
5684 * which determines where the GPU has written. When the
5685 * pointers are equal, the ring is idle. When the GPU
5686 * writes vectors to the ring buffer, it increments the
5687 * wptr. When there is an interrupt, the host then starts
5688 * fetching commands and processing them until the pointers are
5689 * equal again at which point it updates the rptr.
5693 * cik_enable_interrupts - Enable the interrupt ring buffer
5695 * @rdev: radeon_device pointer
5697 * Enable the interrupt ring buffer (CIK).
5699 static void cik_enable_interrupts(struct radeon_device *rdev)
5701 u32 ih_cntl = RREG32(IH_CNTL);
5702 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5704 ih_cntl |= ENABLE_INTR;
5705 ih_rb_cntl |= IH_RB_ENABLE;
5706 WREG32(IH_CNTL, ih_cntl);
5707 WREG32(IH_RB_CNTL, ih_rb_cntl);
5708 rdev->ih.enabled = true;
5712 * cik_disable_interrupts - Disable the interrupt ring buffer
5714 * @rdev: radeon_device pointer
5716 * Disable the interrupt ring buffer (CIK).
5718 static void cik_disable_interrupts(struct radeon_device *rdev)
5720 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5721 u32 ih_cntl = RREG32(IH_CNTL);
5723 ih_rb_cntl &= ~IH_RB_ENABLE;
5724 ih_cntl &= ~ENABLE_INTR;
5725 WREG32(IH_RB_CNTL, ih_rb_cntl);
5726 WREG32(IH_CNTL, ih_cntl);
5727 /* set rptr, wptr to 0 */
5728 WREG32(IH_RB_RPTR, 0);
5729 WREG32(IH_RB_WPTR, 0);
5730 rdev->ih.enabled = false;
5735 * cik_disable_interrupt_state - Disable all interrupt sources
5737 * @rdev: radeon_device pointer
5739 * Clear all interrupt enable bits used by the driver (CIK).
5741 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5746 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5748 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5749 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5750 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5751 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5752 /* compute queues */
5753 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5754 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5755 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5756 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5757 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5758 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5759 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5760 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5762 WREG32(GRBM_INT_CNTL, 0);
5763 /* vline/vblank, etc. */
5764 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5765 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5766 if (rdev->num_crtc >= 4) {
5767 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5768 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5770 if (rdev->num_crtc >= 6) {
5771 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5772 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5776 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5778 /* digital hotplug */
5779 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5780 WREG32(DC_HPD1_INT_CONTROL, tmp);
5781 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5782 WREG32(DC_HPD2_INT_CONTROL, tmp);
5783 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5784 WREG32(DC_HPD3_INT_CONTROL, tmp);
5785 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5786 WREG32(DC_HPD4_INT_CONTROL, tmp);
5787 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5788 WREG32(DC_HPD5_INT_CONTROL, tmp);
5789 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5790 WREG32(DC_HPD6_INT_CONTROL, tmp);
5795 * cik_irq_init - init and enable the interrupt ring
5797 * @rdev: radeon_device pointer
5799 * Allocate a ring buffer for the interrupt controller,
5800 * enable the RLC, disable interrupts, enable the IH
5801 * ring buffer and enable it (CIK).
5802 * Called at device load and reume.
5803 * Returns 0 for success, errors for failure.
5805 static int cik_irq_init(struct radeon_device *rdev)
5809 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5812 ret = r600_ih_ring_alloc(rdev);
5817 cik_disable_interrupts(rdev);
5820 ret = cik_rlc_resume(rdev);
5822 r600_ih_ring_fini(rdev);
5826 /* setup interrupt control */
5827 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5828 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5829 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5830 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5831 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5833 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5834 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5835 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5836 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5838 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5839 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5841 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5842 IH_WPTR_OVERFLOW_CLEAR |
5845 if (rdev->wb.enabled)
5846 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5848 /* set the writeback address whether it's enabled or not */
5849 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5850 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5852 WREG32(IH_RB_CNTL, ih_rb_cntl);
5854 /* set rptr, wptr to 0 */
5855 WREG32(IH_RB_RPTR, 0);
5856 WREG32(IH_RB_WPTR, 0);
5858 /* Default settings for IH_CNTL (disabled at first) */
5859 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5860 /* RPTR_REARM only works if msi's are enabled */
5861 if (rdev->msi_enabled)
5862 ih_cntl |= RPTR_REARM;
5863 WREG32(IH_CNTL, ih_cntl);
5865 /* force the active interrupt state to all disabled */
5866 cik_disable_interrupt_state(rdev);
5868 pci_set_master(rdev->pdev);
5871 cik_enable_interrupts(rdev);
5877 * cik_irq_set - enable/disable interrupt sources
5879 * @rdev: radeon_device pointer
5881 * Enable interrupt sources on the GPU (vblanks, hpd,
5883 * Returns 0 for success, errors for failure.
5885 int cik_irq_set(struct radeon_device *rdev)
5887 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5888 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5889 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5890 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5891 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5892 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5893 u32 grbm_int_cntl = 0;
5894 u32 dma_cntl, dma_cntl1;
5897 if (!rdev->irq.installed) {
5898 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5901 /* don't enable anything if the ih is disabled */
5902 if (!rdev->ih.enabled) {
5903 cik_disable_interrupts(rdev);
5904 /* force the active interrupt state to all disabled */
5905 cik_disable_interrupt_state(rdev);
5909 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5910 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5911 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5912 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5913 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5914 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5916 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5917 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5919 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5920 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5921 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5922 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5923 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5924 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5925 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5926 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5928 if (rdev->flags & RADEON_IS_IGP)
5929 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
5930 ~(THERM_INTH_MASK | THERM_INTL_MASK);
5932 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
5933 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5935 /* enable CP interrupts on all rings */
5936 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5937 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5938 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5940 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5941 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5942 DRM_DEBUG("si_irq_set: sw int cp1\n");
5943 if (ring->me == 1) {
5944 switch (ring->pipe) {
5946 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5949 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5952 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5955 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5958 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5961 } else if (ring->me == 2) {
5962 switch (ring->pipe) {
5964 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5967 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5970 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5973 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5976 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5980 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5983 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5984 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5985 DRM_DEBUG("si_irq_set: sw int cp2\n");
5986 if (ring->me == 1) {
5987 switch (ring->pipe) {
5989 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5992 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5995 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5998 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6001 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6004 } else if (ring->me == 2) {
6005 switch (ring->pipe) {
6007 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6010 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6013 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6016 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6019 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6023 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6027 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6028 DRM_DEBUG("cik_irq_set: sw int dma\n");
6029 dma_cntl |= TRAP_ENABLE;
6032 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6033 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6034 dma_cntl1 |= TRAP_ENABLE;
6037 if (rdev->irq.crtc_vblank_int[0] ||
6038 atomic_read(&rdev->irq.pflip[0])) {
6039 DRM_DEBUG("cik_irq_set: vblank 0\n");
6040 crtc1 |= VBLANK_INTERRUPT_MASK;
6042 if (rdev->irq.crtc_vblank_int[1] ||
6043 atomic_read(&rdev->irq.pflip[1])) {
6044 DRM_DEBUG("cik_irq_set: vblank 1\n");
6045 crtc2 |= VBLANK_INTERRUPT_MASK;
6047 if (rdev->irq.crtc_vblank_int[2] ||
6048 atomic_read(&rdev->irq.pflip[2])) {
6049 DRM_DEBUG("cik_irq_set: vblank 2\n");
6050 crtc3 |= VBLANK_INTERRUPT_MASK;
6052 if (rdev->irq.crtc_vblank_int[3] ||
6053 atomic_read(&rdev->irq.pflip[3])) {
6054 DRM_DEBUG("cik_irq_set: vblank 3\n");
6055 crtc4 |= VBLANK_INTERRUPT_MASK;
6057 if (rdev->irq.crtc_vblank_int[4] ||
6058 atomic_read(&rdev->irq.pflip[4])) {
6059 DRM_DEBUG("cik_irq_set: vblank 4\n");
6060 crtc5 |= VBLANK_INTERRUPT_MASK;
6062 if (rdev->irq.crtc_vblank_int[5] ||
6063 atomic_read(&rdev->irq.pflip[5])) {
6064 DRM_DEBUG("cik_irq_set: vblank 5\n");
6065 crtc6 |= VBLANK_INTERRUPT_MASK;
6067 if (rdev->irq.hpd[0]) {
6068 DRM_DEBUG("cik_irq_set: hpd 1\n");
6069 hpd1 |= DC_HPDx_INT_EN;
6071 if (rdev->irq.hpd[1]) {
6072 DRM_DEBUG("cik_irq_set: hpd 2\n");
6073 hpd2 |= DC_HPDx_INT_EN;
6075 if (rdev->irq.hpd[2]) {
6076 DRM_DEBUG("cik_irq_set: hpd 3\n");
6077 hpd3 |= DC_HPDx_INT_EN;
6079 if (rdev->irq.hpd[3]) {
6080 DRM_DEBUG("cik_irq_set: hpd 4\n");
6081 hpd4 |= DC_HPDx_INT_EN;
6083 if (rdev->irq.hpd[4]) {
6084 DRM_DEBUG("cik_irq_set: hpd 5\n");
6085 hpd5 |= DC_HPDx_INT_EN;
6087 if (rdev->irq.hpd[5]) {
6088 DRM_DEBUG("cik_irq_set: hpd 6\n");
6089 hpd6 |= DC_HPDx_INT_EN;
6092 if (rdev->irq.dpm_thermal) {
6093 DRM_DEBUG("dpm thermal\n");
6094 if (rdev->flags & RADEON_IS_IGP)
6095 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6097 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6100 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6102 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6103 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6105 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6106 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6107 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6108 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6109 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6110 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6111 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6112 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6114 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6116 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6117 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6118 if (rdev->num_crtc >= 4) {
6119 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6120 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6122 if (rdev->num_crtc >= 6) {
6123 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6124 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6127 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6128 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6129 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6130 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6131 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6132 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6134 if (rdev->flags & RADEON_IS_IGP)
6135 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6137 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6143 * cik_irq_ack - ack interrupt sources
6145 * @rdev: radeon_device pointer
6147 * Ack interrupt sources on the GPU (vblanks, hpd,
6148 * etc.) (CIK). Certain interrupts sources are sw
6149 * generated and do not require an explicit ack.
6151 static inline void cik_irq_ack(struct radeon_device *rdev)
6155 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6156 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6157 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6158 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6159 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6160 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6161 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6163 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6164 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6165 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6166 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6167 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6168 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6169 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6170 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6172 if (rdev->num_crtc >= 4) {
6173 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6174 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6175 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6176 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6177 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6178 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6179 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6180 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6183 if (rdev->num_crtc >= 6) {
6184 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6185 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6186 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6187 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6188 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6189 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6190 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6191 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6194 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6195 tmp = RREG32(DC_HPD1_INT_CONTROL);
6196 tmp |= DC_HPDx_INT_ACK;
6197 WREG32(DC_HPD1_INT_CONTROL, tmp);
6199 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6200 tmp = RREG32(DC_HPD2_INT_CONTROL);
6201 tmp |= DC_HPDx_INT_ACK;
6202 WREG32(DC_HPD2_INT_CONTROL, tmp);
6204 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6205 tmp = RREG32(DC_HPD3_INT_CONTROL);
6206 tmp |= DC_HPDx_INT_ACK;
6207 WREG32(DC_HPD3_INT_CONTROL, tmp);
6209 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6210 tmp = RREG32(DC_HPD4_INT_CONTROL);
6211 tmp |= DC_HPDx_INT_ACK;
6212 WREG32(DC_HPD4_INT_CONTROL, tmp);
6214 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6215 tmp = RREG32(DC_HPD5_INT_CONTROL);
6216 tmp |= DC_HPDx_INT_ACK;
6217 WREG32(DC_HPD5_INT_CONTROL, tmp);
6219 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6220 tmp = RREG32(DC_HPD5_INT_CONTROL);
6221 tmp |= DC_HPDx_INT_ACK;
6222 WREG32(DC_HPD6_INT_CONTROL, tmp);
6227 * cik_irq_disable - disable interrupts
6229 * @rdev: radeon_device pointer
6231 * Disable interrupts on the hw (CIK).
6233 static void cik_irq_disable(struct radeon_device *rdev)
6235 cik_disable_interrupts(rdev);
6236 /* Wait and acknowledge irq */
6239 cik_disable_interrupt_state(rdev);
6243 * cik_irq_disable - disable interrupts for suspend
6245 * @rdev: radeon_device pointer
6247 * Disable interrupts and stop the RLC (CIK).
6250 static void cik_irq_suspend(struct radeon_device *rdev)
6252 cik_irq_disable(rdev);
6257 * cik_irq_fini - tear down interrupt support
6259 * @rdev: radeon_device pointer
6261 * Disable interrupts on the hw and free the IH ring
6263 * Used for driver unload.
6265 static void cik_irq_fini(struct radeon_device *rdev)
6267 cik_irq_suspend(rdev);
6268 r600_ih_ring_fini(rdev);
6272 * cik_get_ih_wptr - get the IH ring buffer wptr
6274 * @rdev: radeon_device pointer
6276 * Get the IH ring buffer wptr from either the register
6277 * or the writeback memory buffer (CIK). Also check for
6278 * ring buffer overflow and deal with it.
6279 * Used by cik_irq_process().
6280 * Returns the value of the wptr.
6282 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6286 if (rdev->wb.enabled)
6287 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6289 wptr = RREG32(IH_RB_WPTR);
6291 if (wptr & RB_OVERFLOW) {
6292 /* When a ring buffer overflow happen start parsing interrupt
6293 * from the last not overwritten vector (wptr + 16). Hopefully
6294 * this should allow us to catchup.
6296 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6297 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6298 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6299 tmp = RREG32(IH_RB_CNTL);
6300 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6301 WREG32(IH_RB_CNTL, tmp);
6303 return (wptr & rdev->ih.ptr_mask);
6307 * Each IV ring entry is 128 bits:
6308 * [7:0] - interrupt source id
6310 * [59:32] - interrupt source data
6311 * [63:60] - reserved
6314 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6315 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6316 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6317 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6318 * PIPE_ID - ME0 0=3D
6319 * - ME1&2 compute dispatcher (4 pipes each)
6321 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6322 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6323 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6326 * [127:96] - reserved
6329 * cik_irq_process - interrupt handler
6331 * @rdev: radeon_device pointer
6333 * Interrupt hander (CIK). Walk the IH ring,
6334 * ack interrupts and schedule work to handle
6336 * Returns irq process return code.
6338 int cik_irq_process(struct radeon_device *rdev)
6340 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6341 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6344 u32 src_id, src_data, ring_id;
6345 u8 me_id, pipe_id, queue_id;
6347 bool queue_hotplug = false;
6348 bool queue_reset = false;
6349 u32 addr, status, mc_client;
6350 bool queue_thermal = false;
6352 if (!rdev->ih.enabled || rdev->shutdown)
6355 wptr = cik_get_ih_wptr(rdev);
6358 /* is somebody else already processing irqs? */
6359 if (atomic_xchg(&rdev->ih.lock, 1))
6362 rptr = rdev->ih.rptr;
6363 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6365 /* Order reading of wptr vs. reading of IH ring data */
6368 /* display interrupts */
6371 while (rptr != wptr) {
6372 /* wptr/rptr are in bytes! */
6373 ring_index = rptr / 4;
6374 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6375 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6376 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6379 case 1: /* D1 vblank/vline */
6381 case 0: /* D1 vblank */
6382 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6383 if (rdev->irq.crtc_vblank_int[0]) {
6384 drm_handle_vblank(rdev->ddev, 0);
6385 rdev->pm.vblank_sync = true;
6386 wake_up(&rdev->irq.vblank_queue);
6388 if (atomic_read(&rdev->irq.pflip[0]))
6389 radeon_crtc_handle_flip(rdev, 0);
6390 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6391 DRM_DEBUG("IH: D1 vblank\n");
6394 case 1: /* D1 vline */
6395 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6396 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6397 DRM_DEBUG("IH: D1 vline\n");
6401 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6405 case 2: /* D2 vblank/vline */
6407 case 0: /* D2 vblank */
6408 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6409 if (rdev->irq.crtc_vblank_int[1]) {
6410 drm_handle_vblank(rdev->ddev, 1);
6411 rdev->pm.vblank_sync = true;
6412 wake_up(&rdev->irq.vblank_queue);
6414 if (atomic_read(&rdev->irq.pflip[1]))
6415 radeon_crtc_handle_flip(rdev, 1);
6416 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6417 DRM_DEBUG("IH: D2 vblank\n");
6420 case 1: /* D2 vline */
6421 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6422 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6423 DRM_DEBUG("IH: D2 vline\n");
6427 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6431 case 3: /* D3 vblank/vline */
6433 case 0: /* D3 vblank */
6434 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6435 if (rdev->irq.crtc_vblank_int[2]) {
6436 drm_handle_vblank(rdev->ddev, 2);
6437 rdev->pm.vblank_sync = true;
6438 wake_up(&rdev->irq.vblank_queue);
6440 if (atomic_read(&rdev->irq.pflip[2]))
6441 radeon_crtc_handle_flip(rdev, 2);
6442 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6443 DRM_DEBUG("IH: D3 vblank\n");
6446 case 1: /* D3 vline */
6447 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6448 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6449 DRM_DEBUG("IH: D3 vline\n");
6453 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6457 case 4: /* D4 vblank/vline */
6459 case 0: /* D4 vblank */
6460 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6461 if (rdev->irq.crtc_vblank_int[3]) {
6462 drm_handle_vblank(rdev->ddev, 3);
6463 rdev->pm.vblank_sync = true;
6464 wake_up(&rdev->irq.vblank_queue);
6466 if (atomic_read(&rdev->irq.pflip[3]))
6467 radeon_crtc_handle_flip(rdev, 3);
6468 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6469 DRM_DEBUG("IH: D4 vblank\n");
6472 case 1: /* D4 vline */
6473 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6474 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6475 DRM_DEBUG("IH: D4 vline\n");
6479 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6483 case 5: /* D5 vblank/vline */
6485 case 0: /* D5 vblank */
6486 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6487 if (rdev->irq.crtc_vblank_int[4]) {
6488 drm_handle_vblank(rdev->ddev, 4);
6489 rdev->pm.vblank_sync = true;
6490 wake_up(&rdev->irq.vblank_queue);
6492 if (atomic_read(&rdev->irq.pflip[4]))
6493 radeon_crtc_handle_flip(rdev, 4);
6494 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6495 DRM_DEBUG("IH: D5 vblank\n");
6498 case 1: /* D5 vline */
6499 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6500 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6501 DRM_DEBUG("IH: D5 vline\n");
6505 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6509 case 6: /* D6 vblank/vline */
6511 case 0: /* D6 vblank */
6512 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6513 if (rdev->irq.crtc_vblank_int[5]) {
6514 drm_handle_vblank(rdev->ddev, 5);
6515 rdev->pm.vblank_sync = true;
6516 wake_up(&rdev->irq.vblank_queue);
6518 if (atomic_read(&rdev->irq.pflip[5]))
6519 radeon_crtc_handle_flip(rdev, 5);
6520 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6521 DRM_DEBUG("IH: D6 vblank\n");
6524 case 1: /* D6 vline */
6525 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6526 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6527 DRM_DEBUG("IH: D6 vline\n");
6531 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6535 case 42: /* HPD hotplug */
6538 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6539 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6540 queue_hotplug = true;
6541 DRM_DEBUG("IH: HPD1\n");
6545 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6546 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6547 queue_hotplug = true;
6548 DRM_DEBUG("IH: HPD2\n");
6552 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6553 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6554 queue_hotplug = true;
6555 DRM_DEBUG("IH: HPD3\n");
6559 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6560 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6561 queue_hotplug = true;
6562 DRM_DEBUG("IH: HPD4\n");
6566 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6567 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6568 queue_hotplug = true;
6569 DRM_DEBUG("IH: HPD5\n");
6573 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6574 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6575 queue_hotplug = true;
6576 DRM_DEBUG("IH: HPD6\n");
6580 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6586 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6587 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6588 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6589 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6590 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
6592 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6594 cik_vm_decode_fault(rdev, status, addr, mc_client);
6595 /* reset addr and status */
6596 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6598 case 176: /* GFX RB CP_INT */
6599 case 177: /* GFX IB CP_INT */
6600 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6602 case 181: /* CP EOP event */
6603 DRM_DEBUG("IH: CP EOP\n");
6604 /* XXX check the bitfield order! */
6605 me_id = (ring_id & 0x60) >> 5;
6606 pipe_id = (ring_id & 0x18) >> 3;
6607 queue_id = (ring_id & 0x7) >> 0;
6610 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6614 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6615 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6616 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6617 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6621 case 184: /* CP Privileged reg access */
6622 DRM_ERROR("Illegal register access in command stream\n");
6623 /* XXX check the bitfield order! */
6624 me_id = (ring_id & 0x60) >> 5;
6625 pipe_id = (ring_id & 0x18) >> 3;
6626 queue_id = (ring_id & 0x7) >> 0;
6629 /* This results in a full GPU reset, but all we need to do is soft
6630 * reset the CP for gfx
6644 case 185: /* CP Privileged inst */
6645 DRM_ERROR("Illegal instruction in command stream\n");
6646 /* XXX check the bitfield order! */
6647 me_id = (ring_id & 0x60) >> 5;
6648 pipe_id = (ring_id & 0x18) >> 3;
6649 queue_id = (ring_id & 0x7) >> 0;
6652 /* This results in a full GPU reset, but all we need to do is soft
6653 * reset the CP for gfx
6667 case 224: /* SDMA trap event */
6668 /* XXX check the bitfield order! */
6669 me_id = (ring_id & 0x3) >> 0;
6670 queue_id = (ring_id & 0xc) >> 2;
6671 DRM_DEBUG("IH: SDMA trap\n");
6676 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6689 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6701 case 230: /* thermal low to high */
6702 DRM_DEBUG("IH: thermal low to high\n");
6703 rdev->pm.dpm.thermal.high_to_low = false;
6704 queue_thermal = true;
6706 case 231: /* thermal high to low */
6707 DRM_DEBUG("IH: thermal high to low\n");
6708 rdev->pm.dpm.thermal.high_to_low = true;
6709 queue_thermal = true;
6711 case 233: /* GUI IDLE */
6712 DRM_DEBUG("IH: GUI idle\n");
6714 case 241: /* SDMA Privileged inst */
6715 case 247: /* SDMA Privileged inst */
6716 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6717 /* XXX check the bitfield order! */
6718 me_id = (ring_id & 0x3) >> 0;
6719 queue_id = (ring_id & 0xc) >> 2;
6754 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6758 /* wptr/rptr are in bytes! */
6760 rptr &= rdev->ih.ptr_mask;
6763 schedule_work(&rdev->hotplug_work);
6765 schedule_work(&rdev->reset_work);
6767 schedule_work(&rdev->pm.dpm.thermal.work);
6768 rdev->ih.rptr = rptr;
6769 WREG32(IH_RB_RPTR, rdev->ih.rptr);
6770 atomic_set(&rdev->ih.lock, 0);
6772 /* make sure wptr hasn't changed while processing */
6773 wptr = cik_get_ih_wptr(rdev);
6781 * startup/shutdown callbacks
6784 * cik_startup - program the asic to a functional state
6786 * @rdev: radeon_device pointer
6788 * Programs the asic to a functional state (CIK).
6789 * Called by cik_init() and cik_resume().
6790 * Returns 0 for success, error for failure.
6792 static int cik_startup(struct radeon_device *rdev)
6794 struct radeon_ring *ring;
6797 /* enable pcie gen2/3 link */
6798 cik_pcie_gen3_enable(rdev);
6800 cik_program_aspm(rdev);
6802 cik_mc_program(rdev);
6804 if (rdev->flags & RADEON_IS_IGP) {
6805 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6806 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6807 r = cik_init_microcode(rdev);
6809 DRM_ERROR("Failed to load firmware!\n");
6814 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6815 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6817 r = cik_init_microcode(rdev);
6819 DRM_ERROR("Failed to load firmware!\n");
6824 r = ci_mc_load_microcode(rdev);
6826 DRM_ERROR("Failed to load MC firmware!\n");
6831 r = r600_vram_scratch_init(rdev);
6835 r = cik_pcie_gart_enable(rdev);
6840 /* allocate rlc buffers */
6841 if (rdev->flags & RADEON_IS_IGP) {
6842 if (rdev->family == CHIP_KAVERI) {
6843 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
6844 rdev->rlc.reg_list_size =
6845 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
6847 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
6848 rdev->rlc.reg_list_size =
6849 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
6852 rdev->rlc.cs_data = ci_cs_data;
6853 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
6854 r = sumo_rlc_init(rdev);
6856 DRM_ERROR("Failed to init rlc BOs!\n");
6860 /* allocate wb buffer */
6861 r = radeon_wb_init(rdev);
6865 /* allocate mec buffers */
6866 r = cik_mec_init(rdev);
6868 DRM_ERROR("Failed to init MEC BOs!\n");
6872 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6874 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6878 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6880 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6884 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6886 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6890 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6892 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6896 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6898 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6902 r = uvd_v4_2_resume(rdev);
6904 r = radeon_fence_driver_start_ring(rdev,
6905 R600_RING_TYPE_UVD_INDEX);
6907 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6910 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6913 if (!rdev->irq.installed) {
6914 r = radeon_irq_kms_init(rdev);
6919 r = cik_irq_init(rdev);
6921 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6922 radeon_irq_kms_fini(rdev);
6927 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6928 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6929 CP_RB0_RPTR, CP_RB0_WPTR,
6934 /* set up the compute queues */
6935 /* type-2 packets are deprecated on MEC, use type-3 instead */
6936 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6937 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6938 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6939 PACKET3(PACKET3_NOP, 0x3FFF));
6942 ring->me = 1; /* first MEC */
6943 ring->pipe = 0; /* first pipe */
6944 ring->queue = 0; /* first queue */
6945 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6947 /* type-2 packets are deprecated on MEC, use type-3 instead */
6948 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6949 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6950 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6951 PACKET3(PACKET3_NOP, 0x3FFF));
6954 /* dGPU only have 1 MEC */
6955 ring->me = 1; /* first MEC */
6956 ring->pipe = 0; /* first pipe */
6957 ring->queue = 1; /* second queue */
6958 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6960 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6961 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6962 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6963 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6964 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6968 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6969 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6970 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6971 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6972 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6976 r = cik_cp_resume(rdev);
6980 r = cik_sdma_resume(rdev);
6984 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6985 if (ring->ring_size) {
6986 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6987 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6990 r = uvd_v1_0_init(rdev);
6992 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6995 r = radeon_ib_pool_init(rdev);
6997 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7001 r = radeon_vm_manager_init(rdev);
7003 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7007 r = dce6_audio_init(rdev);
7015 * cik_resume - resume the asic to a functional state
7017 * @rdev: radeon_device pointer
7019 * Programs the asic to a functional state (CIK).
7021 * Returns 0 for success, error for failure.
7023 int cik_resume(struct radeon_device *rdev)
7028 atom_asic_init(rdev->mode_info.atom_context);
7030 /* init golden registers */
7031 cik_init_golden_registers(rdev);
7033 rdev->accel_working = true;
7034 r = cik_startup(rdev);
7036 DRM_ERROR("cik startup failed on resume\n");
7037 rdev->accel_working = false;
7046 * cik_suspend - suspend the asic
7048 * @rdev: radeon_device pointer
7050 * Bring the chip into a state suitable for suspend (CIK).
7051 * Called at suspend.
7052 * Returns 0 for success.
7054 int cik_suspend(struct radeon_device *rdev)
7056 dce6_audio_fini(rdev);
7057 radeon_vm_manager_fini(rdev);
7058 cik_cp_enable(rdev, false);
7059 cik_sdma_enable(rdev, false);
7060 uvd_v1_0_fini(rdev);
7061 radeon_uvd_suspend(rdev);
7062 cik_irq_suspend(rdev);
7063 radeon_wb_disable(rdev);
7064 cik_pcie_gart_disable(rdev);
7068 /* Plan is to move initialization in that function and use
7069 * helper function so that radeon_device_init pretty much
7070 * do nothing more than calling asic specific function. This
7071 * should also allow to remove a bunch of callback function
7075 * cik_init - asic specific driver and hw init
7077 * @rdev: radeon_device pointer
7079 * Setup asic specific driver variables and program the hw
7080 * to a functional state (CIK).
7081 * Called at driver startup.
7082 * Returns 0 for success, errors for failure.
7084 int cik_init(struct radeon_device *rdev)
7086 struct radeon_ring *ring;
7090 if (!radeon_get_bios(rdev)) {
7091 if (ASIC_IS_AVIVO(rdev))
7094 /* Must be an ATOMBIOS */
7095 if (!rdev->is_atom_bios) {
7096 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7099 r = radeon_atombios_init(rdev);
7103 /* Post card if necessary */
7104 if (!radeon_card_posted(rdev)) {
7106 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7109 DRM_INFO("GPU not posted. posting now...\n");
7110 atom_asic_init(rdev->mode_info.atom_context);
7112 /* init golden registers */
7113 cik_init_golden_registers(rdev);
7114 /* Initialize scratch registers */
7115 cik_scratch_init(rdev);
7116 /* Initialize surface registers */
7117 radeon_surface_init(rdev);
7118 /* Initialize clocks */
7119 radeon_get_clock_info(rdev->ddev);
7122 r = radeon_fence_driver_init(rdev);
7126 /* initialize memory controller */
7127 r = cik_mc_init(rdev);
7130 /* Memory manager */
7131 r = radeon_bo_init(rdev);
7135 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7136 ring->ring_obj = NULL;
7137 r600_ring_init(rdev, ring, 1024 * 1024);
7139 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7140 ring->ring_obj = NULL;
7141 r600_ring_init(rdev, ring, 1024 * 1024);
7142 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7146 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7147 ring->ring_obj = NULL;
7148 r600_ring_init(rdev, ring, 1024 * 1024);
7149 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7153 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7154 ring->ring_obj = NULL;
7155 r600_ring_init(rdev, ring, 256 * 1024);
7157 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7158 ring->ring_obj = NULL;
7159 r600_ring_init(rdev, ring, 256 * 1024);
7161 r = radeon_uvd_init(rdev);
7163 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7164 ring->ring_obj = NULL;
7165 r600_ring_init(rdev, ring, 4096);
7168 rdev->ih.ring_obj = NULL;
7169 r600_ih_ring_init(rdev, 64 * 1024);
7171 r = r600_pcie_gart_init(rdev);
7175 rdev->accel_working = true;
7176 r = cik_startup(rdev);
7178 dev_err(rdev->dev, "disabling GPU acceleration\n");
7180 cik_sdma_fini(rdev);
7182 sumo_rlc_fini(rdev);
7184 radeon_wb_fini(rdev);
7185 radeon_ib_pool_fini(rdev);
7186 radeon_vm_manager_fini(rdev);
7187 radeon_irq_kms_fini(rdev);
7188 cik_pcie_gart_fini(rdev);
7189 rdev->accel_working = false;
7192 /* Don't start up if the MC ucode is missing.
7193 * The default clocks and voltages before the MC ucode
7194 * is loaded are not suffient for advanced operations.
7196 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7197 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7205 * cik_fini - asic specific driver and hw fini
7207 * @rdev: radeon_device pointer
7209 * Tear down the asic specific driver variables and program the hw
7210 * to an idle state (CIK).
7211 * Called at driver unload.
7213 void cik_fini(struct radeon_device *rdev)
7216 cik_sdma_fini(rdev);
7218 sumo_rlc_fini(rdev);
7220 radeon_wb_fini(rdev);
7221 radeon_vm_manager_fini(rdev);
7222 radeon_ib_pool_fini(rdev);
7223 radeon_irq_kms_fini(rdev);
7224 uvd_v1_0_fini(rdev);
7225 radeon_uvd_fini(rdev);
7226 cik_pcie_gart_fini(rdev);
7227 r600_vram_scratch_fini(rdev);
7228 radeon_gem_fini(rdev);
7229 radeon_fence_driver_fini(rdev);
7230 radeon_bo_fini(rdev);
7231 radeon_atombios_fini(rdev);
7236 /* display watermark setup */
7238 * dce8_line_buffer_adjust - Set up the line buffer
7240 * @rdev: radeon_device pointer
7241 * @radeon_crtc: the selected display controller
7242 * @mode: the current display mode on the selected display
7245 * Setup up the line buffer allocation for
7246 * the selected display controller (CIK).
7247 * Returns the line buffer size in pixels.
7249 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7250 struct radeon_crtc *radeon_crtc,
7251 struct drm_display_mode *mode)
7257 * There are 6 line buffers, one for each display controllers.
7258 * There are 3 partitions per LB. Select the number of partitions
7259 * to enable based on the display width. For display widths larger
7260 * than 4096, you need use to use 2 display controllers and combine
7261 * them using the stereo blender.
7263 if (radeon_crtc->base.enabled && mode) {
7264 if (mode->crtc_hdisplay < 1920)
7266 else if (mode->crtc_hdisplay < 2560)
7268 else if (mode->crtc_hdisplay < 4096)
7271 DRM_DEBUG_KMS("Mode too big for LB!\n");
7277 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7278 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7280 if (radeon_crtc->base.enabled && mode) {
7292 /* controller not enabled, so no lb used */
7297 * cik_get_number_of_dram_channels - get the number of dram channels
7299 * @rdev: radeon_device pointer
7301 * Look up the number of video ram channels (CIK).
7302 * Used for display watermark bandwidth calculations
7303 * Returns the number of dram channels
7305 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7307 u32 tmp = RREG32(MC_SHARED_CHMAP);
7309 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7332 struct dce8_wm_params {
7333 u32 dram_channels; /* number of dram channels */
7334 u32 yclk; /* bandwidth per dram data pin in kHz */
7335 u32 sclk; /* engine clock in kHz */
7336 u32 disp_clk; /* display clock in kHz */
7337 u32 src_width; /* viewport width */
7338 u32 active_time; /* active display time in ns */
7339 u32 blank_time; /* blank time in ns */
7340 bool interlaced; /* mode is interlaced */
7341 fixed20_12 vsc; /* vertical scale ratio */
7342 u32 num_heads; /* number of active crtcs */
7343 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7344 u32 lb_size; /* line buffer allocated to pipe */
7345 u32 vtaps; /* vertical scaler taps */
7349 * dce8_dram_bandwidth - get the dram bandwidth
7351 * @wm: watermark calculation data
7353 * Calculate the raw dram bandwidth (CIK).
7354 * Used for display watermark bandwidth calculations
7355 * Returns the dram bandwidth in MBytes/s
7357 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7359 /* Calculate raw DRAM Bandwidth */
7360 fixed20_12 dram_efficiency; /* 0.7 */
7361 fixed20_12 yclk, dram_channels, bandwidth;
7364 a.full = dfixed_const(1000);
7365 yclk.full = dfixed_const(wm->yclk);
7366 yclk.full = dfixed_div(yclk, a);
7367 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7368 a.full = dfixed_const(10);
7369 dram_efficiency.full = dfixed_const(7);
7370 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7371 bandwidth.full = dfixed_mul(dram_channels, yclk);
7372 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7374 return dfixed_trunc(bandwidth);
7378 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7380 * @wm: watermark calculation data
7382 * Calculate the dram bandwidth used for display (CIK).
7383 * Used for display watermark bandwidth calculations
7384 * Returns the dram bandwidth for display in MBytes/s
7386 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7388 /* Calculate DRAM Bandwidth and the part allocated to display. */
7389 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7390 fixed20_12 yclk, dram_channels, bandwidth;
7393 a.full = dfixed_const(1000);
7394 yclk.full = dfixed_const(wm->yclk);
7395 yclk.full = dfixed_div(yclk, a);
7396 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7397 a.full = dfixed_const(10);
7398 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7399 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7400 bandwidth.full = dfixed_mul(dram_channels, yclk);
7401 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7403 return dfixed_trunc(bandwidth);
7407 * dce8_data_return_bandwidth - get the data return bandwidth
7409 * @wm: watermark calculation data
7411 * Calculate the data return bandwidth used for display (CIK).
7412 * Used for display watermark bandwidth calculations
7413 * Returns the data return bandwidth in MBytes/s
7415 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7417 /* Calculate the display Data return Bandwidth */
7418 fixed20_12 return_efficiency; /* 0.8 */
7419 fixed20_12 sclk, bandwidth;
7422 a.full = dfixed_const(1000);
7423 sclk.full = dfixed_const(wm->sclk);
7424 sclk.full = dfixed_div(sclk, a);
7425 a.full = dfixed_const(10);
7426 return_efficiency.full = dfixed_const(8);
7427 return_efficiency.full = dfixed_div(return_efficiency, a);
7428 a.full = dfixed_const(32);
7429 bandwidth.full = dfixed_mul(a, sclk);
7430 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7432 return dfixed_trunc(bandwidth);
7436 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7438 * @wm: watermark calculation data
7440 * Calculate the dmif bandwidth used for display (CIK).
7441 * Used for display watermark bandwidth calculations
7442 * Returns the dmif bandwidth in MBytes/s
7444 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7446 /* Calculate the DMIF Request Bandwidth */
7447 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7448 fixed20_12 disp_clk, bandwidth;
7451 a.full = dfixed_const(1000);
7452 disp_clk.full = dfixed_const(wm->disp_clk);
7453 disp_clk.full = dfixed_div(disp_clk, a);
7454 a.full = dfixed_const(32);
7455 b.full = dfixed_mul(a, disp_clk);
7457 a.full = dfixed_const(10);
7458 disp_clk_request_efficiency.full = dfixed_const(8);
7459 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7461 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7463 return dfixed_trunc(bandwidth);
7467 * dce8_available_bandwidth - get the min available bandwidth
7469 * @wm: watermark calculation data
7471 * Calculate the min available bandwidth used for display (CIK).
7472 * Used for display watermark bandwidth calculations
7473 * Returns the min available bandwidth in MBytes/s
7475 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7477 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7478 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7479 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7480 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7482 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7486 * dce8_average_bandwidth - get the average available bandwidth
7488 * @wm: watermark calculation data
7490 * Calculate the average available bandwidth used for display (CIK).
7491 * Used for display watermark bandwidth calculations
7492 * Returns the average available bandwidth in MBytes/s
7494 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7496 /* Calculate the display mode Average Bandwidth
7497 * DisplayMode should contain the source and destination dimensions,
7501 fixed20_12 line_time;
7502 fixed20_12 src_width;
7503 fixed20_12 bandwidth;
7506 a.full = dfixed_const(1000);
7507 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7508 line_time.full = dfixed_div(line_time, a);
7509 bpp.full = dfixed_const(wm->bytes_per_pixel);
7510 src_width.full = dfixed_const(wm->src_width);
7511 bandwidth.full = dfixed_mul(src_width, bpp);
7512 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7513 bandwidth.full = dfixed_div(bandwidth, line_time);
7515 return dfixed_trunc(bandwidth);
7519 * dce8_latency_watermark - get the latency watermark
7521 * @wm: watermark calculation data
7523 * Calculate the latency watermark (CIK).
7524 * Used for display watermark bandwidth calculations
7525 * Returns the latency watermark in ns
7527 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7529 /* First calculate the latency in ns */
7530 u32 mc_latency = 2000; /* 2000 ns. */
7531 u32 available_bandwidth = dce8_available_bandwidth(wm);
7532 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7533 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7534 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7535 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7536 (wm->num_heads * cursor_line_pair_return_time);
7537 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7538 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7539 u32 tmp, dmif_size = 12288;
7542 if (wm->num_heads == 0)
7545 a.full = dfixed_const(2);
7546 b.full = dfixed_const(1);
7547 if ((wm->vsc.full > a.full) ||
7548 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7550 ((wm->vsc.full >= a.full) && wm->interlaced))
7551 max_src_lines_per_dst_line = 4;
7553 max_src_lines_per_dst_line = 2;
7555 a.full = dfixed_const(available_bandwidth);
7556 b.full = dfixed_const(wm->num_heads);
7557 a.full = dfixed_div(a, b);
7559 b.full = dfixed_const(mc_latency + 512);
7560 c.full = dfixed_const(wm->disp_clk);
7561 b.full = dfixed_div(b, c);
7563 c.full = dfixed_const(dmif_size);
7564 b.full = dfixed_div(c, b);
7566 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7568 b.full = dfixed_const(1000);
7569 c.full = dfixed_const(wm->disp_clk);
7570 b.full = dfixed_div(c, b);
7571 c.full = dfixed_const(wm->bytes_per_pixel);
7572 b.full = dfixed_mul(b, c);
7574 lb_fill_bw = min(tmp, dfixed_trunc(b));
7576 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7577 b.full = dfixed_const(1000);
7578 c.full = dfixed_const(lb_fill_bw);
7579 b.full = dfixed_div(c, b);
7580 a.full = dfixed_div(a, b);
7581 line_fill_time = dfixed_trunc(a);
7583 if (line_fill_time < wm->active_time)
7586 return latency + (line_fill_time - wm->active_time);
7591 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7592 * average and available dram bandwidth
7594 * @wm: watermark calculation data
7596 * Check if the display average bandwidth fits in the display
7597 * dram bandwidth (CIK).
7598 * Used for display watermark bandwidth calculations
7599 * Returns true if the display fits, false if not.
7601 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7603 if (dce8_average_bandwidth(wm) <=
7604 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7611 * dce8_average_bandwidth_vs_available_bandwidth - check
7612 * average and available bandwidth
7614 * @wm: watermark calculation data
7616 * Check if the display average bandwidth fits in the display
7617 * available bandwidth (CIK).
7618 * Used for display watermark bandwidth calculations
7619 * Returns true if the display fits, false if not.
7621 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7623 if (dce8_average_bandwidth(wm) <=
7624 (dce8_available_bandwidth(wm) / wm->num_heads))
7631 * dce8_check_latency_hiding - check latency hiding
7633 * @wm: watermark calculation data
7635 * Check latency hiding (CIK).
7636 * Used for display watermark bandwidth calculations
7637 * Returns true if the display fits, false if not.
7639 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7641 u32 lb_partitions = wm->lb_size / wm->src_width;
7642 u32 line_time = wm->active_time + wm->blank_time;
7643 u32 latency_tolerant_lines;
7647 a.full = dfixed_const(1);
7648 if (wm->vsc.full > a.full)
7649 latency_tolerant_lines = 1;
7651 if (lb_partitions <= (wm->vtaps + 1))
7652 latency_tolerant_lines = 1;
7654 latency_tolerant_lines = 2;
7657 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7659 if (dce8_latency_watermark(wm) <= latency_hiding)
7666 * dce8_program_watermarks - program display watermarks
7668 * @rdev: radeon_device pointer
7669 * @radeon_crtc: the selected display controller
7670 * @lb_size: line buffer size
7671 * @num_heads: number of display controllers in use
7673 * Calculate and program the display watermarks for the
7674 * selected display controller (CIK).
7676 static void dce8_program_watermarks(struct radeon_device *rdev,
7677 struct radeon_crtc *radeon_crtc,
7678 u32 lb_size, u32 num_heads)
7680 struct drm_display_mode *mode = &radeon_crtc->base.mode;
7681 struct dce8_wm_params wm_low, wm_high;
7684 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7687 if (radeon_crtc->base.enabled && num_heads && mode) {
7688 pixel_period = 1000000 / (u32)mode->clock;
7689 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7691 /* watermark for high clocks */
7692 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7693 rdev->pm.dpm_enabled) {
7695 radeon_dpm_get_mclk(rdev, false) * 10;
7697 radeon_dpm_get_sclk(rdev, false) * 10;
7699 wm_high.yclk = rdev->pm.current_mclk * 10;
7700 wm_high.sclk = rdev->pm.current_sclk * 10;
7703 wm_high.disp_clk = mode->clock;
7704 wm_high.src_width = mode->crtc_hdisplay;
7705 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7706 wm_high.blank_time = line_time - wm_high.active_time;
7707 wm_high.interlaced = false;
7708 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7709 wm_high.interlaced = true;
7710 wm_high.vsc = radeon_crtc->vsc;
7712 if (radeon_crtc->rmx_type != RMX_OFF)
7714 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7715 wm_high.lb_size = lb_size;
7716 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7717 wm_high.num_heads = num_heads;
7719 /* set for high clocks */
7720 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7722 /* possibly force display priority to high */
7723 /* should really do this at mode validation time... */
7724 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7725 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7726 !dce8_check_latency_hiding(&wm_high) ||
7727 (rdev->disp_priority == 2)) {
7728 DRM_DEBUG_KMS("force priority to high\n");
7731 /* watermark for low clocks */
7732 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7733 rdev->pm.dpm_enabled) {
7735 radeon_dpm_get_mclk(rdev, true) * 10;
7737 radeon_dpm_get_sclk(rdev, true) * 10;
7739 wm_low.yclk = rdev->pm.current_mclk * 10;
7740 wm_low.sclk = rdev->pm.current_sclk * 10;
7743 wm_low.disp_clk = mode->clock;
7744 wm_low.src_width = mode->crtc_hdisplay;
7745 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7746 wm_low.blank_time = line_time - wm_low.active_time;
7747 wm_low.interlaced = false;
7748 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7749 wm_low.interlaced = true;
7750 wm_low.vsc = radeon_crtc->vsc;
7752 if (radeon_crtc->rmx_type != RMX_OFF)
7754 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7755 wm_low.lb_size = lb_size;
7756 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7757 wm_low.num_heads = num_heads;
7759 /* set for low clocks */
7760 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7762 /* possibly force display priority to high */
7763 /* should really do this at mode validation time... */
7764 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7765 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7766 !dce8_check_latency_hiding(&wm_low) ||
7767 (rdev->disp_priority == 2)) {
7768 DRM_DEBUG_KMS("force priority to high\n");
7773 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7775 tmp &= ~LATENCY_WATERMARK_MASK(3);
7776 tmp |= LATENCY_WATERMARK_MASK(1);
7777 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7778 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7779 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7780 LATENCY_HIGH_WATERMARK(line_time)));
7782 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7783 tmp &= ~LATENCY_WATERMARK_MASK(3);
7784 tmp |= LATENCY_WATERMARK_MASK(2);
7785 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7786 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7787 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7788 LATENCY_HIGH_WATERMARK(line_time)));
7789 /* restore original selection */
7790 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7792 /* save values for DPM */
7793 radeon_crtc->line_time = line_time;
7794 radeon_crtc->wm_high = latency_watermark_a;
7795 radeon_crtc->wm_low = latency_watermark_b;
7799 * dce8_bandwidth_update - program display watermarks
7801 * @rdev: radeon_device pointer
7803 * Calculate and program the display watermarks and line
7804 * buffer allocation (CIK).
7806 void dce8_bandwidth_update(struct radeon_device *rdev)
7808 struct drm_display_mode *mode = NULL;
7809 u32 num_heads = 0, lb_size;
7812 radeon_update_display_priority(rdev);
7814 for (i = 0; i < rdev->num_crtc; i++) {
7815 if (rdev->mode_info.crtcs[i]->base.enabled)
7818 for (i = 0; i < rdev->num_crtc; i++) {
7819 mode = &rdev->mode_info.crtcs[i]->base.mode;
7820 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
7821 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
7826 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
7828 * @rdev: radeon_device pointer
7830 * Fetches a GPU clock counter snapshot (SI).
7831 * Returns the 64 bit clock counter snapshot.
7833 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
7837 mutex_lock(&rdev->gpu_clock_mutex);
7838 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7839 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7840 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7841 mutex_unlock(&rdev->gpu_clock_mutex);
7845 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
7846 u32 cntl_reg, u32 status_reg)
7849 struct atom_clock_dividers dividers;
7852 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
7853 clock, false, ÷rs);
7857 tmp = RREG32_SMC(cntl_reg);
7858 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
7859 tmp |= dividers.post_divider;
7860 WREG32_SMC(cntl_reg, tmp);
7862 for (i = 0; i < 100; i++) {
7863 if (RREG32_SMC(status_reg) & DCLK_STATUS)
7873 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7877 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
7881 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
7885 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
7887 struct pci_dev *root = rdev->pdev->bus->self;
7888 int bridge_pos, gpu_pos;
7889 u32 speed_cntl, mask, current_data_rate;
7893 if (radeon_pcie_gen2 == 0)
7896 if (rdev->flags & RADEON_IS_IGP)
7899 if (!(rdev->flags & RADEON_IS_PCIE))
7902 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7906 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7909 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7910 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7911 LC_CURRENT_DATA_RATE_SHIFT;
7912 if (mask & DRM_PCIE_SPEED_80) {
7913 if (current_data_rate == 2) {
7914 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7917 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7918 } else if (mask & DRM_PCIE_SPEED_50) {
7919 if (current_data_rate == 1) {
7920 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7923 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7926 bridge_pos = pci_pcie_cap(root);
7930 gpu_pos = pci_pcie_cap(rdev->pdev);
7934 if (mask & DRM_PCIE_SPEED_80) {
7935 /* re-try equalization if gen3 is not already enabled */
7936 if (current_data_rate != 2) {
7937 u16 bridge_cfg, gpu_cfg;
7938 u16 bridge_cfg2, gpu_cfg2;
7939 u32 max_lw, current_lw, tmp;
7941 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7942 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7944 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7945 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7947 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7948 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7950 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7951 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7952 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7954 if (current_lw < max_lw) {
7955 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7956 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7957 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7958 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7959 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7960 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7964 for (i = 0; i < 10; i++) {
7966 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7967 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7970 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7971 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7973 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7974 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7976 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7977 tmp |= LC_SET_QUIESCE;
7978 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7980 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7982 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7987 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7988 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7989 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7990 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7992 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7993 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7994 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7995 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7998 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7999 tmp16 &= ~((1 << 4) | (7 << 9));
8000 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8001 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8003 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8004 tmp16 &= ~((1 << 4) | (7 << 9));
8005 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8006 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8008 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8009 tmp &= ~LC_SET_QUIESCE;
8010 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8015 /* set the link speed */
8016 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8017 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8018 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8020 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8022 if (mask & DRM_PCIE_SPEED_80)
8023 tmp16 |= 3; /* gen3 */
8024 else if (mask & DRM_PCIE_SPEED_50)
8025 tmp16 |= 2; /* gen2 */
8027 tmp16 |= 1; /* gen1 */
8028 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8030 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8031 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8032 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8034 for (i = 0; i < rdev->usec_timeout; i++) {
8035 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8036 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8042 static void cik_program_aspm(struct radeon_device *rdev)
8045 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8046 bool disable_clkreq = false;
8048 if (radeon_aspm == 0)
8051 /* XXX double check IGPs */
8052 if (rdev->flags & RADEON_IS_IGP)
8055 if (!(rdev->flags & RADEON_IS_PCIE))
8058 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8059 data &= ~LC_XMIT_N_FTS_MASK;
8060 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8062 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8064 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8065 data |= LC_GO_TO_RECOVERY;
8067 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8069 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8070 data |= P_IGNORE_EDB_ERR;
8072 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8074 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8075 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8076 data |= LC_PMI_TO_L1_DIS;
8078 data |= LC_L0S_INACTIVITY(7);
8081 data |= LC_L1_INACTIVITY(7);
8082 data &= ~LC_PMI_TO_L1_DIS;
8084 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8086 if (!disable_plloff_in_l1) {
8087 bool clk_req_support;
8089 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8090 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8091 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8093 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8095 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8096 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8097 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8099 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8101 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8102 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8103 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8105 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8107 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8108 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8109 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8111 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8113 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8114 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8115 data |= LC_DYN_LANES_PWR_STATE(3);
8117 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8119 if (!disable_clkreq) {
8120 struct pci_dev *root = rdev->pdev->bus->self;
8123 clk_req_support = false;
8124 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8125 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8126 clk_req_support = true;
8128 clk_req_support = false;
8131 if (clk_req_support) {
8132 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8133 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8135 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8137 orig = data = RREG32_SMC(THM_CLK_CNTL);
8138 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8139 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8141 WREG32_SMC(THM_CLK_CNTL, data);
8143 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8144 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8145 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8147 WREG32_SMC(MISC_CLK_CTRL, data);
8149 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8150 data &= ~BCLK_AS_XCLK;
8152 WREG32_SMC(CG_CLKPIN_CNTL, data);
8154 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8155 data &= ~FORCE_BIF_REFCLK_EN;
8157 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8159 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8160 data &= ~MPLL_CLKOUT_SEL_MASK;
8161 data |= MPLL_CLKOUT_SEL(4);
8163 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8168 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8171 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8172 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8174 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8177 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8178 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8179 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8180 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8181 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8182 data &= ~LC_L0S_INACTIVITY_MASK;
8184 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);