drm/radeon: update cik_tiling_mode_table_init() for hawaii
[linux-2.6-block.git] / drivers / gpu / drm / radeon / cik.c
CommitLineData
8cc1a532
AD
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
8cc1a532
AD
25#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
6f2043ce 29#include "radeon_asic.h"
8cc1a532
AD
30#include "cikd.h"
31#include "atom.h"
841cf442 32#include "cik_blit_shaders.h"
8c68e393 33#include "radeon_ucode.h"
22c775ce 34#include "clearstate_ci.h"
02c81327
AD
35
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
21a93e13 42MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
cc8dbbb4 43MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
02c81327
AD
44MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
21a93e13 49MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
02c81327
AD
50MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51MODULE_FIRMWARE("radeon/KABINI_me.bin");
52MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
21a93e13 55MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
02c81327 56
a59781bb
AD
57extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58extern void r600_ih_ring_fini(struct radeon_device *rdev);
6f2043ce
AD
59extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
cc066715 61extern bool evergreen_is_display_hung(struct radeon_device *rdev);
1fd11777
AD
62extern void sumo_rlc_fini(struct radeon_device *rdev);
63extern int sumo_rlc_init(struct radeon_device *rdev);
1c49165d 64extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
866d83de 65extern void si_rlc_reset(struct radeon_device *rdev);
22c775ce 66extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
2483b4ea
CK
67extern int cik_sdma_resume(struct radeon_device *rdev);
68extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69extern void cik_sdma_fini(struct radeon_device *rdev);
cc066715 70static void cik_rlc_stop(struct radeon_device *rdev);
8a7cd276 71static void cik_pcie_gen3_enable(struct radeon_device *rdev);
7235711a 72static void cik_program_aspm(struct radeon_device *rdev);
22c775ce
AD
73static void cik_init_pg(struct radeon_device *rdev);
74static void cik_init_cg(struct radeon_device *rdev);
fb2c7f4d
AD
75static void cik_fini_pg(struct radeon_device *rdev);
76static void cik_fini_cg(struct radeon_device *rdev);
4214faf6
AD
77static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
78 bool enable);
6f2043ce 79
286d9cc6
AD
80/* get temperature in millidegrees */
81int ci_get_temp(struct radeon_device *rdev)
82{
83 u32 temp;
84 int actual_temp = 0;
85
86 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
87 CTF_TEMP_SHIFT;
88
89 if (temp & 0x200)
90 actual_temp = 255;
91 else
92 actual_temp = temp & 0x1ff;
93
94 actual_temp = actual_temp * 1000;
95
96 return actual_temp;
97}
98
99/* get temperature in millidegrees */
100int kv_get_temp(struct radeon_device *rdev)
101{
102 u32 temp;
103 int actual_temp = 0;
104
105 temp = RREG32_SMC(0xC0300E0C);
106
107 if (temp)
108 actual_temp = (temp / 8) - 49;
109 else
110 actual_temp = 0;
111
112 actual_temp = actual_temp * 1000;
113
114 return actual_temp;
115}
6f2043ce 116
6e2c3c0a
AD
117/*
118 * Indirect registers accessor
119 */
120u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
121{
0a5b7b0b 122 unsigned long flags;
6e2c3c0a
AD
123 u32 r;
124
0a5b7b0b 125 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
6e2c3c0a
AD
126 WREG32(PCIE_INDEX, reg);
127 (void)RREG32(PCIE_INDEX);
128 r = RREG32(PCIE_DATA);
0a5b7b0b 129 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
6e2c3c0a
AD
130 return r;
131}
132
133void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
134{
0a5b7b0b
AD
135 unsigned long flags;
136
137 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
6e2c3c0a
AD
138 WREG32(PCIE_INDEX, reg);
139 (void)RREG32(PCIE_INDEX);
140 WREG32(PCIE_DATA, v);
141 (void)RREG32(PCIE_DATA);
0a5b7b0b 142 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
6e2c3c0a
AD
143}
144
22c775ce
AD
145static const u32 spectre_rlc_save_restore_register_list[] =
146{
147 (0x0e00 << 16) | (0xc12c >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0xc140 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc150 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0xc15c >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0xc168 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0xc170 >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0xc178 >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0xc204 >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0xc2b4 >> 2),
164 0x00000000,
165 (0x0e00 << 16) | (0xc2b8 >> 2),
166 0x00000000,
167 (0x0e00 << 16) | (0xc2bc >> 2),
168 0x00000000,
169 (0x0e00 << 16) | (0xc2c0 >> 2),
170 0x00000000,
171 (0x0e00 << 16) | (0x8228 >> 2),
172 0x00000000,
173 (0x0e00 << 16) | (0x829c >> 2),
174 0x00000000,
175 (0x0e00 << 16) | (0x869c >> 2),
176 0x00000000,
177 (0x0600 << 16) | (0x98f4 >> 2),
178 0x00000000,
179 (0x0e00 << 16) | (0x98f8 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x9900 >> 2),
182 0x00000000,
183 (0x0e00 << 16) | (0xc260 >> 2),
184 0x00000000,
185 (0x0e00 << 16) | (0x90e8 >> 2),
186 0x00000000,
187 (0x0e00 << 16) | (0x3c000 >> 2),
188 0x00000000,
189 (0x0e00 << 16) | (0x3c00c >> 2),
190 0x00000000,
191 (0x0e00 << 16) | (0x8c1c >> 2),
192 0x00000000,
193 (0x0e00 << 16) | (0x9700 >> 2),
194 0x00000000,
195 (0x0e00 << 16) | (0xcd20 >> 2),
196 0x00000000,
197 (0x4e00 << 16) | (0xcd20 >> 2),
198 0x00000000,
199 (0x5e00 << 16) | (0xcd20 >> 2),
200 0x00000000,
201 (0x6e00 << 16) | (0xcd20 >> 2),
202 0x00000000,
203 (0x7e00 << 16) | (0xcd20 >> 2),
204 0x00000000,
205 (0x8e00 << 16) | (0xcd20 >> 2),
206 0x00000000,
207 (0x9e00 << 16) | (0xcd20 >> 2),
208 0x00000000,
209 (0xae00 << 16) | (0xcd20 >> 2),
210 0x00000000,
211 (0xbe00 << 16) | (0xcd20 >> 2),
212 0x00000000,
213 (0x0e00 << 16) | (0x89bc >> 2),
214 0x00000000,
215 (0x0e00 << 16) | (0x8900 >> 2),
216 0x00000000,
217 0x3,
218 (0x0e00 << 16) | (0xc130 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc134 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc1fc >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc208 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc264 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc268 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc26c >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc270 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0xc274 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0xc278 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0xc27c >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0xc280 >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0xc284 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0xc288 >> 2),
245 0x00000000,
246 (0x0e00 << 16) | (0xc28c >> 2),
247 0x00000000,
248 (0x0e00 << 16) | (0xc290 >> 2),
249 0x00000000,
250 (0x0e00 << 16) | (0xc294 >> 2),
251 0x00000000,
252 (0x0e00 << 16) | (0xc298 >> 2),
253 0x00000000,
254 (0x0e00 << 16) | (0xc29c >> 2),
255 0x00000000,
256 (0x0e00 << 16) | (0xc2a0 >> 2),
257 0x00000000,
258 (0x0e00 << 16) | (0xc2a4 >> 2),
259 0x00000000,
260 (0x0e00 << 16) | (0xc2a8 >> 2),
261 0x00000000,
262 (0x0e00 << 16) | (0xc2ac >> 2),
263 0x00000000,
264 (0x0e00 << 16) | (0xc2b0 >> 2),
265 0x00000000,
266 (0x0e00 << 16) | (0x301d0 >> 2),
267 0x00000000,
268 (0x0e00 << 16) | (0x30238 >> 2),
269 0x00000000,
270 (0x0e00 << 16) | (0x30250 >> 2),
271 0x00000000,
272 (0x0e00 << 16) | (0x30254 >> 2),
273 0x00000000,
274 (0x0e00 << 16) | (0x30258 >> 2),
275 0x00000000,
276 (0x0e00 << 16) | (0x3025c >> 2),
277 0x00000000,
278 (0x4e00 << 16) | (0xc900 >> 2),
279 0x00000000,
280 (0x5e00 << 16) | (0xc900 >> 2),
281 0x00000000,
282 (0x6e00 << 16) | (0xc900 >> 2),
283 0x00000000,
284 (0x7e00 << 16) | (0xc900 >> 2),
285 0x00000000,
286 (0x8e00 << 16) | (0xc900 >> 2),
287 0x00000000,
288 (0x9e00 << 16) | (0xc900 >> 2),
289 0x00000000,
290 (0xae00 << 16) | (0xc900 >> 2),
291 0x00000000,
292 (0xbe00 << 16) | (0xc900 >> 2),
293 0x00000000,
294 (0x4e00 << 16) | (0xc904 >> 2),
295 0x00000000,
296 (0x5e00 << 16) | (0xc904 >> 2),
297 0x00000000,
298 (0x6e00 << 16) | (0xc904 >> 2),
299 0x00000000,
300 (0x7e00 << 16) | (0xc904 >> 2),
301 0x00000000,
302 (0x8e00 << 16) | (0xc904 >> 2),
303 0x00000000,
304 (0x9e00 << 16) | (0xc904 >> 2),
305 0x00000000,
306 (0xae00 << 16) | (0xc904 >> 2),
307 0x00000000,
308 (0xbe00 << 16) | (0xc904 >> 2),
309 0x00000000,
310 (0x4e00 << 16) | (0xc908 >> 2),
311 0x00000000,
312 (0x5e00 << 16) | (0xc908 >> 2),
313 0x00000000,
314 (0x6e00 << 16) | (0xc908 >> 2),
315 0x00000000,
316 (0x7e00 << 16) | (0xc908 >> 2),
317 0x00000000,
318 (0x8e00 << 16) | (0xc908 >> 2),
319 0x00000000,
320 (0x9e00 << 16) | (0xc908 >> 2),
321 0x00000000,
322 (0xae00 << 16) | (0xc908 >> 2),
323 0x00000000,
324 (0xbe00 << 16) | (0xc908 >> 2),
325 0x00000000,
326 (0x4e00 << 16) | (0xc90c >> 2),
327 0x00000000,
328 (0x5e00 << 16) | (0xc90c >> 2),
329 0x00000000,
330 (0x6e00 << 16) | (0xc90c >> 2),
331 0x00000000,
332 (0x7e00 << 16) | (0xc90c >> 2),
333 0x00000000,
334 (0x8e00 << 16) | (0xc90c >> 2),
335 0x00000000,
336 (0x9e00 << 16) | (0xc90c >> 2),
337 0x00000000,
338 (0xae00 << 16) | (0xc90c >> 2),
339 0x00000000,
340 (0xbe00 << 16) | (0xc90c >> 2),
341 0x00000000,
342 (0x4e00 << 16) | (0xc910 >> 2),
343 0x00000000,
344 (0x5e00 << 16) | (0xc910 >> 2),
345 0x00000000,
346 (0x6e00 << 16) | (0xc910 >> 2),
347 0x00000000,
348 (0x7e00 << 16) | (0xc910 >> 2),
349 0x00000000,
350 (0x8e00 << 16) | (0xc910 >> 2),
351 0x00000000,
352 (0x9e00 << 16) | (0xc910 >> 2),
353 0x00000000,
354 (0xae00 << 16) | (0xc910 >> 2),
355 0x00000000,
356 (0xbe00 << 16) | (0xc910 >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0xc99c >> 2),
359 0x00000000,
360 (0x0e00 << 16) | (0x9834 >> 2),
361 0x00000000,
362 (0x0000 << 16) | (0x30f00 >> 2),
363 0x00000000,
364 (0x0001 << 16) | (0x30f00 >> 2),
365 0x00000000,
366 (0x0000 << 16) | (0x30f04 >> 2),
367 0x00000000,
368 (0x0001 << 16) | (0x30f04 >> 2),
369 0x00000000,
370 (0x0000 << 16) | (0x30f08 >> 2),
371 0x00000000,
372 (0x0001 << 16) | (0x30f08 >> 2),
373 0x00000000,
374 (0x0000 << 16) | (0x30f0c >> 2),
375 0x00000000,
376 (0x0001 << 16) | (0x30f0c >> 2),
377 0x00000000,
378 (0x0600 << 16) | (0x9b7c >> 2),
379 0x00000000,
380 (0x0e00 << 16) | (0x8a14 >> 2),
381 0x00000000,
382 (0x0e00 << 16) | (0x8a18 >> 2),
383 0x00000000,
384 (0x0600 << 16) | (0x30a00 >> 2),
385 0x00000000,
386 (0x0e00 << 16) | (0x8bf0 >> 2),
387 0x00000000,
388 (0x0e00 << 16) | (0x8bcc >> 2),
389 0x00000000,
390 (0x0e00 << 16) | (0x8b24 >> 2),
391 0x00000000,
392 (0x0e00 << 16) | (0x30a04 >> 2),
393 0x00000000,
394 (0x0600 << 16) | (0x30a10 >> 2),
395 0x00000000,
396 (0x0600 << 16) | (0x30a14 >> 2),
397 0x00000000,
398 (0x0600 << 16) | (0x30a18 >> 2),
399 0x00000000,
400 (0x0600 << 16) | (0x30a2c >> 2),
401 0x00000000,
402 (0x0e00 << 16) | (0xc700 >> 2),
403 0x00000000,
404 (0x0e00 << 16) | (0xc704 >> 2),
405 0x00000000,
406 (0x0e00 << 16) | (0xc708 >> 2),
407 0x00000000,
408 (0x0e00 << 16) | (0xc768 >> 2),
409 0x00000000,
410 (0x0400 << 16) | (0xc770 >> 2),
411 0x00000000,
412 (0x0400 << 16) | (0xc774 >> 2),
413 0x00000000,
414 (0x0400 << 16) | (0xc778 >> 2),
415 0x00000000,
416 (0x0400 << 16) | (0xc77c >> 2),
417 0x00000000,
418 (0x0400 << 16) | (0xc780 >> 2),
419 0x00000000,
420 (0x0400 << 16) | (0xc784 >> 2),
421 0x00000000,
422 (0x0400 << 16) | (0xc788 >> 2),
423 0x00000000,
424 (0x0400 << 16) | (0xc78c >> 2),
425 0x00000000,
426 (0x0400 << 16) | (0xc798 >> 2),
427 0x00000000,
428 (0x0400 << 16) | (0xc79c >> 2),
429 0x00000000,
430 (0x0400 << 16) | (0xc7a0 >> 2),
431 0x00000000,
432 (0x0400 << 16) | (0xc7a4 >> 2),
433 0x00000000,
434 (0x0400 << 16) | (0xc7a8 >> 2),
435 0x00000000,
436 (0x0400 << 16) | (0xc7ac >> 2),
437 0x00000000,
438 (0x0400 << 16) | (0xc7b0 >> 2),
439 0x00000000,
440 (0x0400 << 16) | (0xc7b4 >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x9100 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0x3c010 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x92a8 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0x92ac >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0x92b4 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0x92b8 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0x92bc >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0x92c0 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0x92c4 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0x92c8 >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0x92cc >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0x92d0 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0x8c00 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0x8c04 >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0x8c20 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0x8c38 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0x8c3c >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xae00 >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x9604 >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0xac08 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0xac0c >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0xac10 >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0xac14 >> 2),
487 0x00000000,
488 (0x0e00 << 16) | (0xac58 >> 2),
489 0x00000000,
490 (0x0e00 << 16) | (0xac68 >> 2),
491 0x00000000,
492 (0x0e00 << 16) | (0xac6c >> 2),
493 0x00000000,
494 (0x0e00 << 16) | (0xac70 >> 2),
495 0x00000000,
496 (0x0e00 << 16) | (0xac74 >> 2),
497 0x00000000,
498 (0x0e00 << 16) | (0xac78 >> 2),
499 0x00000000,
500 (0x0e00 << 16) | (0xac7c >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0xac80 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0xac84 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0xac88 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0xac8c >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x970c >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x9714 >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x9718 >> 2),
515 0x00000000,
516 (0x0e00 << 16) | (0x971c >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x31068 >> 2),
519 0x00000000,
520 (0x4e00 << 16) | (0x31068 >> 2),
521 0x00000000,
522 (0x5e00 << 16) | (0x31068 >> 2),
523 0x00000000,
524 (0x6e00 << 16) | (0x31068 >> 2),
525 0x00000000,
526 (0x7e00 << 16) | (0x31068 >> 2),
527 0x00000000,
528 (0x8e00 << 16) | (0x31068 >> 2),
529 0x00000000,
530 (0x9e00 << 16) | (0x31068 >> 2),
531 0x00000000,
532 (0xae00 << 16) | (0x31068 >> 2),
533 0x00000000,
534 (0xbe00 << 16) | (0x31068 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0xcd10 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0xcd14 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x88b0 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x88b4 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x88b8 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x88bc >> 2),
547 0x00000000,
548 (0x0400 << 16) | (0x89c0 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x88c4 >> 2),
551 0x00000000,
552 (0x0e00 << 16) | (0x88c8 >> 2),
553 0x00000000,
554 (0x0e00 << 16) | (0x88d0 >> 2),
555 0x00000000,
556 (0x0e00 << 16) | (0x88d4 >> 2),
557 0x00000000,
558 (0x0e00 << 16) | (0x88d8 >> 2),
559 0x00000000,
560 (0x0e00 << 16) | (0x8980 >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0x30938 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0x3093c >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0x30940 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0x89a0 >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0x30900 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0x30904 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0x89b4 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0x3c210 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0x3c214 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0x3c218 >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0x8904 >> 2),
583 0x00000000,
584 0x5,
585 (0x0e00 << 16) | (0x8c28 >> 2),
586 (0x0e00 << 16) | (0x8c2c >> 2),
587 (0x0e00 << 16) | (0x8c30 >> 2),
588 (0x0e00 << 16) | (0x8c34 >> 2),
589 (0x0e00 << 16) | (0x9600 >> 2),
590};
591
592static const u32 kalindi_rlc_save_restore_register_list[] =
593{
594 (0x0e00 << 16) | (0xc12c >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc140 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0xc150 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0xc15c >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0xc168 >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0xc170 >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0xc204 >> 2),
607 0x00000000,
608 (0x0e00 << 16) | (0xc2b4 >> 2),
609 0x00000000,
610 (0x0e00 << 16) | (0xc2b8 >> 2),
611 0x00000000,
612 (0x0e00 << 16) | (0xc2bc >> 2),
613 0x00000000,
614 (0x0e00 << 16) | (0xc2c0 >> 2),
615 0x00000000,
616 (0x0e00 << 16) | (0x8228 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x829c >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x869c >> 2),
621 0x00000000,
622 (0x0600 << 16) | (0x98f4 >> 2),
623 0x00000000,
624 (0x0e00 << 16) | (0x98f8 >> 2),
625 0x00000000,
626 (0x0e00 << 16) | (0x9900 >> 2),
627 0x00000000,
628 (0x0e00 << 16) | (0xc260 >> 2),
629 0x00000000,
630 (0x0e00 << 16) | (0x90e8 >> 2),
631 0x00000000,
632 (0x0e00 << 16) | (0x3c000 >> 2),
633 0x00000000,
634 (0x0e00 << 16) | (0x3c00c >> 2),
635 0x00000000,
636 (0x0e00 << 16) | (0x8c1c >> 2),
637 0x00000000,
638 (0x0e00 << 16) | (0x9700 >> 2),
639 0x00000000,
640 (0x0e00 << 16) | (0xcd20 >> 2),
641 0x00000000,
642 (0x4e00 << 16) | (0xcd20 >> 2),
643 0x00000000,
644 (0x5e00 << 16) | (0xcd20 >> 2),
645 0x00000000,
646 (0x6e00 << 16) | (0xcd20 >> 2),
647 0x00000000,
648 (0x7e00 << 16) | (0xcd20 >> 2),
649 0x00000000,
650 (0x0e00 << 16) | (0x89bc >> 2),
651 0x00000000,
652 (0x0e00 << 16) | (0x8900 >> 2),
653 0x00000000,
654 0x3,
655 (0x0e00 << 16) | (0xc130 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0xc134 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0xc1fc >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0xc208 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0xc264 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0xc268 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0xc26c >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0xc270 >> 2),
670 0x00000000,
671 (0x0e00 << 16) | (0xc274 >> 2),
672 0x00000000,
673 (0x0e00 << 16) | (0xc28c >> 2),
674 0x00000000,
675 (0x0e00 << 16) | (0xc290 >> 2),
676 0x00000000,
677 (0x0e00 << 16) | (0xc294 >> 2),
678 0x00000000,
679 (0x0e00 << 16) | (0xc298 >> 2),
680 0x00000000,
681 (0x0e00 << 16) | (0xc2a0 >> 2),
682 0x00000000,
683 (0x0e00 << 16) | (0xc2a4 >> 2),
684 0x00000000,
685 (0x0e00 << 16) | (0xc2a8 >> 2),
686 0x00000000,
687 (0x0e00 << 16) | (0xc2ac >> 2),
688 0x00000000,
689 (0x0e00 << 16) | (0x301d0 >> 2),
690 0x00000000,
691 (0x0e00 << 16) | (0x30238 >> 2),
692 0x00000000,
693 (0x0e00 << 16) | (0x30250 >> 2),
694 0x00000000,
695 (0x0e00 << 16) | (0x30254 >> 2),
696 0x00000000,
697 (0x0e00 << 16) | (0x30258 >> 2),
698 0x00000000,
699 (0x0e00 << 16) | (0x3025c >> 2),
700 0x00000000,
701 (0x4e00 << 16) | (0xc900 >> 2),
702 0x00000000,
703 (0x5e00 << 16) | (0xc900 >> 2),
704 0x00000000,
705 (0x6e00 << 16) | (0xc900 >> 2),
706 0x00000000,
707 (0x7e00 << 16) | (0xc900 >> 2),
708 0x00000000,
709 (0x4e00 << 16) | (0xc904 >> 2),
710 0x00000000,
711 (0x5e00 << 16) | (0xc904 >> 2),
712 0x00000000,
713 (0x6e00 << 16) | (0xc904 >> 2),
714 0x00000000,
715 (0x7e00 << 16) | (0xc904 >> 2),
716 0x00000000,
717 (0x4e00 << 16) | (0xc908 >> 2),
718 0x00000000,
719 (0x5e00 << 16) | (0xc908 >> 2),
720 0x00000000,
721 (0x6e00 << 16) | (0xc908 >> 2),
722 0x00000000,
723 (0x7e00 << 16) | (0xc908 >> 2),
724 0x00000000,
725 (0x4e00 << 16) | (0xc90c >> 2),
726 0x00000000,
727 (0x5e00 << 16) | (0xc90c >> 2),
728 0x00000000,
729 (0x6e00 << 16) | (0xc90c >> 2),
730 0x00000000,
731 (0x7e00 << 16) | (0xc90c >> 2),
732 0x00000000,
733 (0x4e00 << 16) | (0xc910 >> 2),
734 0x00000000,
735 (0x5e00 << 16) | (0xc910 >> 2),
736 0x00000000,
737 (0x6e00 << 16) | (0xc910 >> 2),
738 0x00000000,
739 (0x7e00 << 16) | (0xc910 >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0xc99c >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0x9834 >> 2),
744 0x00000000,
745 (0x0000 << 16) | (0x30f00 >> 2),
746 0x00000000,
747 (0x0000 << 16) | (0x30f04 >> 2),
748 0x00000000,
749 (0x0000 << 16) | (0x30f08 >> 2),
750 0x00000000,
751 (0x0000 << 16) | (0x30f0c >> 2),
752 0x00000000,
753 (0x0600 << 16) | (0x9b7c >> 2),
754 0x00000000,
755 (0x0e00 << 16) | (0x8a14 >> 2),
756 0x00000000,
757 (0x0e00 << 16) | (0x8a18 >> 2),
758 0x00000000,
759 (0x0600 << 16) | (0x30a00 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x8bf0 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x8bcc >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8b24 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x30a04 >> 2),
768 0x00000000,
769 (0x0600 << 16) | (0x30a10 >> 2),
770 0x00000000,
771 (0x0600 << 16) | (0x30a14 >> 2),
772 0x00000000,
773 (0x0600 << 16) | (0x30a18 >> 2),
774 0x00000000,
775 (0x0600 << 16) | (0x30a2c >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0xc700 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xc704 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xc708 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xc768 >> 2),
784 0x00000000,
785 (0x0400 << 16) | (0xc770 >> 2),
786 0x00000000,
787 (0x0400 << 16) | (0xc774 >> 2),
788 0x00000000,
789 (0x0400 << 16) | (0xc798 >> 2),
790 0x00000000,
791 (0x0400 << 16) | (0xc79c >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0x9100 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0x3c010 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0x8c00 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0x8c04 >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0x8c20 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0x8c38 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0x8c3c >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xae00 >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x9604 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0xac08 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0xac0c >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0xac10 >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0xac14 >> 2),
818 0x00000000,
819 (0x0e00 << 16) | (0xac58 >> 2),
820 0x00000000,
821 (0x0e00 << 16) | (0xac68 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0xac6c >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0xac70 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xac74 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0xac78 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0xac7c >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0xac80 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0xac84 >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0xac88 >> 2),
838 0x00000000,
839 (0x0e00 << 16) | (0xac8c >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x970c >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x9714 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x9718 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x971c >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x31068 >> 2),
850 0x00000000,
851 (0x4e00 << 16) | (0x31068 >> 2),
852 0x00000000,
853 (0x5e00 << 16) | (0x31068 >> 2),
854 0x00000000,
855 (0x6e00 << 16) | (0x31068 >> 2),
856 0x00000000,
857 (0x7e00 << 16) | (0x31068 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0xcd10 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0xcd14 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x88b0 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x88b4 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x88b8 >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x88bc >> 2),
870 0x00000000,
871 (0x0400 << 16) | (0x89c0 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x88c4 >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x88c8 >> 2),
876 0x00000000,
877 (0x0e00 << 16) | (0x88d0 >> 2),
878 0x00000000,
879 (0x0e00 << 16) | (0x88d4 >> 2),
880 0x00000000,
881 (0x0e00 << 16) | (0x88d8 >> 2),
882 0x00000000,
883 (0x0e00 << 16) | (0x8980 >> 2),
884 0x00000000,
885 (0x0e00 << 16) | (0x30938 >> 2),
886 0x00000000,
887 (0x0e00 << 16) | (0x3093c >> 2),
888 0x00000000,
889 (0x0e00 << 16) | (0x30940 >> 2),
890 0x00000000,
891 (0x0e00 << 16) | (0x89a0 >> 2),
892 0x00000000,
893 (0x0e00 << 16) | (0x30900 >> 2),
894 0x00000000,
895 (0x0e00 << 16) | (0x30904 >> 2),
896 0x00000000,
897 (0x0e00 << 16) | (0x89b4 >> 2),
898 0x00000000,
899 (0x0e00 << 16) | (0x3e1fc >> 2),
900 0x00000000,
901 (0x0e00 << 16) | (0x3c210 >> 2),
902 0x00000000,
903 (0x0e00 << 16) | (0x3c214 >> 2),
904 0x00000000,
905 (0x0e00 << 16) | (0x3c218 >> 2),
906 0x00000000,
907 (0x0e00 << 16) | (0x8904 >> 2),
908 0x00000000,
909 0x5,
910 (0x0e00 << 16) | (0x8c28 >> 2),
911 (0x0e00 << 16) | (0x8c2c >> 2),
912 (0x0e00 << 16) | (0x8c30 >> 2),
913 (0x0e00 << 16) | (0x8c34 >> 2),
914 (0x0e00 << 16) | (0x9600 >> 2),
915};
916
0aafd313
AD
917static const u32 bonaire_golden_spm_registers[] =
918{
919 0x30800, 0xe0ffffff, 0xe0000000
920};
921
922static const u32 bonaire_golden_common_registers[] =
923{
924 0xc770, 0xffffffff, 0x00000800,
925 0xc774, 0xffffffff, 0x00000800,
926 0xc798, 0xffffffff, 0x00007fbf,
927 0xc79c, 0xffffffff, 0x00007faf
928};
929
930static const u32 bonaire_golden_registers[] =
931{
932 0x3354, 0x00000333, 0x00000333,
933 0x3350, 0x000c0fc0, 0x00040200,
934 0x9a10, 0x00010000, 0x00058208,
935 0x3c000, 0xffff1fff, 0x00140000,
936 0x3c200, 0xfdfc0fff, 0x00000100,
937 0x3c234, 0x40000000, 0x40000200,
938 0x9830, 0xffffffff, 0x00000000,
939 0x9834, 0xf00fffff, 0x00000400,
940 0x9838, 0x0002021c, 0x00020200,
941 0xc78, 0x00000080, 0x00000000,
942 0x5bb0, 0x000000f0, 0x00000070,
943 0x5bc0, 0xf0311fff, 0x80300000,
944 0x98f8, 0x73773777, 0x12010001,
945 0x350c, 0x00810000, 0x408af000,
946 0x7030, 0x31000111, 0x00000011,
947 0x2f48, 0x73773777, 0x12010001,
948 0x220c, 0x00007fb6, 0x0021a1b1,
949 0x2210, 0x00007fb6, 0x002021b1,
950 0x2180, 0x00007fb6, 0x00002191,
951 0x2218, 0x00007fb6, 0x002121b1,
952 0x221c, 0x00007fb6, 0x002021b1,
953 0x21dc, 0x00007fb6, 0x00002191,
954 0x21e0, 0x00007fb6, 0x00002191,
955 0x3628, 0x0000003f, 0x0000000a,
956 0x362c, 0x0000003f, 0x0000000a,
957 0x2ae4, 0x00073ffe, 0x000022a2,
958 0x240c, 0x000007ff, 0x00000000,
959 0x8a14, 0xf000003f, 0x00000007,
960 0x8bf0, 0x00002001, 0x00000001,
961 0x8b24, 0xffffffff, 0x00ffffff,
962 0x30a04, 0x0000ff0f, 0x00000000,
963 0x28a4c, 0x07ffffff, 0x06000000,
964 0x4d8, 0x00000fff, 0x00000100,
965 0x3e78, 0x00000001, 0x00000002,
966 0x9100, 0x03000000, 0x0362c688,
967 0x8c00, 0x000000ff, 0x00000001,
968 0xe40, 0x00001fff, 0x00001fff,
969 0x9060, 0x0000007f, 0x00000020,
970 0x9508, 0x00010000, 0x00010000,
971 0xac14, 0x000003ff, 0x000000f3,
972 0xac0c, 0xffffffff, 0x00001032
973};
974
975static const u32 bonaire_mgcg_cgcg_init[] =
976{
977 0xc420, 0xffffffff, 0xfffffffc,
978 0x30800, 0xffffffff, 0xe0000000,
979 0x3c2a0, 0xffffffff, 0x00000100,
980 0x3c208, 0xffffffff, 0x00000100,
981 0x3c2c0, 0xffffffff, 0xc0000100,
982 0x3c2c8, 0xffffffff, 0xc0000100,
983 0x3c2c4, 0xffffffff, 0xc0000100,
984 0x55e4, 0xffffffff, 0x00600100,
985 0x3c280, 0xffffffff, 0x00000100,
986 0x3c214, 0xffffffff, 0x06000100,
987 0x3c220, 0xffffffff, 0x00000100,
988 0x3c218, 0xffffffff, 0x06000100,
989 0x3c204, 0xffffffff, 0x00000100,
990 0x3c2e0, 0xffffffff, 0x00000100,
991 0x3c224, 0xffffffff, 0x00000100,
992 0x3c200, 0xffffffff, 0x00000100,
993 0x3c230, 0xffffffff, 0x00000100,
994 0x3c234, 0xffffffff, 0x00000100,
995 0x3c250, 0xffffffff, 0x00000100,
996 0x3c254, 0xffffffff, 0x00000100,
997 0x3c258, 0xffffffff, 0x00000100,
998 0x3c25c, 0xffffffff, 0x00000100,
999 0x3c260, 0xffffffff, 0x00000100,
1000 0x3c27c, 0xffffffff, 0x00000100,
1001 0x3c278, 0xffffffff, 0x00000100,
1002 0x3c210, 0xffffffff, 0x06000100,
1003 0x3c290, 0xffffffff, 0x00000100,
1004 0x3c274, 0xffffffff, 0x00000100,
1005 0x3c2b4, 0xffffffff, 0x00000100,
1006 0x3c2b0, 0xffffffff, 0x00000100,
1007 0x3c270, 0xffffffff, 0x00000100,
1008 0x30800, 0xffffffff, 0xe0000000,
1009 0x3c020, 0xffffffff, 0x00010000,
1010 0x3c024, 0xffffffff, 0x00030002,
1011 0x3c028, 0xffffffff, 0x00040007,
1012 0x3c02c, 0xffffffff, 0x00060005,
1013 0x3c030, 0xffffffff, 0x00090008,
1014 0x3c034, 0xffffffff, 0x00010000,
1015 0x3c038, 0xffffffff, 0x00030002,
1016 0x3c03c, 0xffffffff, 0x00040007,
1017 0x3c040, 0xffffffff, 0x00060005,
1018 0x3c044, 0xffffffff, 0x00090008,
1019 0x3c048, 0xffffffff, 0x00010000,
1020 0x3c04c, 0xffffffff, 0x00030002,
1021 0x3c050, 0xffffffff, 0x00040007,
1022 0x3c054, 0xffffffff, 0x00060005,
1023 0x3c058, 0xffffffff, 0x00090008,
1024 0x3c05c, 0xffffffff, 0x00010000,
1025 0x3c060, 0xffffffff, 0x00030002,
1026 0x3c064, 0xffffffff, 0x00040007,
1027 0x3c068, 0xffffffff, 0x00060005,
1028 0x3c06c, 0xffffffff, 0x00090008,
1029 0x3c070, 0xffffffff, 0x00010000,
1030 0x3c074, 0xffffffff, 0x00030002,
1031 0x3c078, 0xffffffff, 0x00040007,
1032 0x3c07c, 0xffffffff, 0x00060005,
1033 0x3c080, 0xffffffff, 0x00090008,
1034 0x3c084, 0xffffffff, 0x00010000,
1035 0x3c088, 0xffffffff, 0x00030002,
1036 0x3c08c, 0xffffffff, 0x00040007,
1037 0x3c090, 0xffffffff, 0x00060005,
1038 0x3c094, 0xffffffff, 0x00090008,
1039 0x3c098, 0xffffffff, 0x00010000,
1040 0x3c09c, 0xffffffff, 0x00030002,
1041 0x3c0a0, 0xffffffff, 0x00040007,
1042 0x3c0a4, 0xffffffff, 0x00060005,
1043 0x3c0a8, 0xffffffff, 0x00090008,
1044 0x3c000, 0xffffffff, 0x96e00200,
1045 0x8708, 0xffffffff, 0x00900100,
1046 0xc424, 0xffffffff, 0x0020003f,
1047 0x38, 0xffffffff, 0x0140001c,
1048 0x3c, 0x000f0000, 0x000f0000,
1049 0x220, 0xffffffff, 0xC060000C,
1050 0x224, 0xc0000fff, 0x00000100,
1051 0xf90, 0xffffffff, 0x00000100,
1052 0xf98, 0x00000101, 0x00000000,
1053 0x20a8, 0xffffffff, 0x00000104,
1054 0x55e4, 0xff000fff, 0x00000100,
1055 0x30cc, 0xc0000fff, 0x00000104,
1056 0xc1e4, 0x00000001, 0x00000001,
1057 0xd00c, 0xff000ff0, 0x00000100,
1058 0xd80c, 0xff000ff0, 0x00000100
1059};
1060
1061static const u32 spectre_golden_spm_registers[] =
1062{
1063 0x30800, 0xe0ffffff, 0xe0000000
1064};
1065
1066static const u32 spectre_golden_common_registers[] =
1067{
1068 0xc770, 0xffffffff, 0x00000800,
1069 0xc774, 0xffffffff, 0x00000800,
1070 0xc798, 0xffffffff, 0x00007fbf,
1071 0xc79c, 0xffffffff, 0x00007faf
1072};
1073
1074static const u32 spectre_golden_registers[] =
1075{
1076 0x3c000, 0xffff1fff, 0x96940200,
1077 0x3c00c, 0xffff0001, 0xff000000,
1078 0x3c200, 0xfffc0fff, 0x00000100,
1079 0x6ed8, 0x00010101, 0x00010000,
1080 0x9834, 0xf00fffff, 0x00000400,
1081 0x9838, 0xfffffffc, 0x00020200,
1082 0x5bb0, 0x000000f0, 0x00000070,
1083 0x5bc0, 0xf0311fff, 0x80300000,
1084 0x98f8, 0x73773777, 0x12010001,
1085 0x9b7c, 0x00ff0000, 0x00fc0000,
1086 0x2f48, 0x73773777, 0x12010001,
1087 0x8a14, 0xf000003f, 0x00000007,
1088 0x8b24, 0xffffffff, 0x00ffffff,
1089 0x28350, 0x3f3f3fff, 0x00000082,
1090 0x28355, 0x0000003f, 0x00000000,
1091 0x3e78, 0x00000001, 0x00000002,
1092 0x913c, 0xffff03df, 0x00000004,
1093 0xc768, 0x00000008, 0x00000008,
1094 0x8c00, 0x000008ff, 0x00000800,
1095 0x9508, 0x00010000, 0x00010000,
1096 0xac0c, 0xffffffff, 0x54763210,
1097 0x214f8, 0x01ff01ff, 0x00000002,
1098 0x21498, 0x007ff800, 0x00200000,
1099 0x2015c, 0xffffffff, 0x00000f40,
1100 0x30934, 0xffffffff, 0x00000001
1101};
1102
1103static const u32 spectre_mgcg_cgcg_init[] =
1104{
1105 0xc420, 0xffffffff, 0xfffffffc,
1106 0x30800, 0xffffffff, 0xe0000000,
1107 0x3c2a0, 0xffffffff, 0x00000100,
1108 0x3c208, 0xffffffff, 0x00000100,
1109 0x3c2c0, 0xffffffff, 0x00000100,
1110 0x3c2c8, 0xffffffff, 0x00000100,
1111 0x3c2c4, 0xffffffff, 0x00000100,
1112 0x55e4, 0xffffffff, 0x00600100,
1113 0x3c280, 0xffffffff, 0x00000100,
1114 0x3c214, 0xffffffff, 0x06000100,
1115 0x3c220, 0xffffffff, 0x00000100,
1116 0x3c218, 0xffffffff, 0x06000100,
1117 0x3c204, 0xffffffff, 0x00000100,
1118 0x3c2e0, 0xffffffff, 0x00000100,
1119 0x3c224, 0xffffffff, 0x00000100,
1120 0x3c200, 0xffffffff, 0x00000100,
1121 0x3c230, 0xffffffff, 0x00000100,
1122 0x3c234, 0xffffffff, 0x00000100,
1123 0x3c250, 0xffffffff, 0x00000100,
1124 0x3c254, 0xffffffff, 0x00000100,
1125 0x3c258, 0xffffffff, 0x00000100,
1126 0x3c25c, 0xffffffff, 0x00000100,
1127 0x3c260, 0xffffffff, 0x00000100,
1128 0x3c27c, 0xffffffff, 0x00000100,
1129 0x3c278, 0xffffffff, 0x00000100,
1130 0x3c210, 0xffffffff, 0x06000100,
1131 0x3c290, 0xffffffff, 0x00000100,
1132 0x3c274, 0xffffffff, 0x00000100,
1133 0x3c2b4, 0xffffffff, 0x00000100,
1134 0x3c2b0, 0xffffffff, 0x00000100,
1135 0x3c270, 0xffffffff, 0x00000100,
1136 0x30800, 0xffffffff, 0xe0000000,
1137 0x3c020, 0xffffffff, 0x00010000,
1138 0x3c024, 0xffffffff, 0x00030002,
1139 0x3c028, 0xffffffff, 0x00040007,
1140 0x3c02c, 0xffffffff, 0x00060005,
1141 0x3c030, 0xffffffff, 0x00090008,
1142 0x3c034, 0xffffffff, 0x00010000,
1143 0x3c038, 0xffffffff, 0x00030002,
1144 0x3c03c, 0xffffffff, 0x00040007,
1145 0x3c040, 0xffffffff, 0x00060005,
1146 0x3c044, 0xffffffff, 0x00090008,
1147 0x3c048, 0xffffffff, 0x00010000,
1148 0x3c04c, 0xffffffff, 0x00030002,
1149 0x3c050, 0xffffffff, 0x00040007,
1150 0x3c054, 0xffffffff, 0x00060005,
1151 0x3c058, 0xffffffff, 0x00090008,
1152 0x3c05c, 0xffffffff, 0x00010000,
1153 0x3c060, 0xffffffff, 0x00030002,
1154 0x3c064, 0xffffffff, 0x00040007,
1155 0x3c068, 0xffffffff, 0x00060005,
1156 0x3c06c, 0xffffffff, 0x00090008,
1157 0x3c070, 0xffffffff, 0x00010000,
1158 0x3c074, 0xffffffff, 0x00030002,
1159 0x3c078, 0xffffffff, 0x00040007,
1160 0x3c07c, 0xffffffff, 0x00060005,
1161 0x3c080, 0xffffffff, 0x00090008,
1162 0x3c084, 0xffffffff, 0x00010000,
1163 0x3c088, 0xffffffff, 0x00030002,
1164 0x3c08c, 0xffffffff, 0x00040007,
1165 0x3c090, 0xffffffff, 0x00060005,
1166 0x3c094, 0xffffffff, 0x00090008,
1167 0x3c098, 0xffffffff, 0x00010000,
1168 0x3c09c, 0xffffffff, 0x00030002,
1169 0x3c0a0, 0xffffffff, 0x00040007,
1170 0x3c0a4, 0xffffffff, 0x00060005,
1171 0x3c0a8, 0xffffffff, 0x00090008,
1172 0x3c0ac, 0xffffffff, 0x00010000,
1173 0x3c0b0, 0xffffffff, 0x00030002,
1174 0x3c0b4, 0xffffffff, 0x00040007,
1175 0x3c0b8, 0xffffffff, 0x00060005,
1176 0x3c0bc, 0xffffffff, 0x00090008,
1177 0x3c000, 0xffffffff, 0x96e00200,
1178 0x8708, 0xffffffff, 0x00900100,
1179 0xc424, 0xffffffff, 0x0020003f,
1180 0x38, 0xffffffff, 0x0140001c,
1181 0x3c, 0x000f0000, 0x000f0000,
1182 0x220, 0xffffffff, 0xC060000C,
1183 0x224, 0xc0000fff, 0x00000100,
1184 0xf90, 0xffffffff, 0x00000100,
1185 0xf98, 0x00000101, 0x00000000,
1186 0x20a8, 0xffffffff, 0x00000104,
1187 0x55e4, 0xff000fff, 0x00000100,
1188 0x30cc, 0xc0000fff, 0x00000104,
1189 0xc1e4, 0x00000001, 0x00000001,
1190 0xd00c, 0xff000ff0, 0x00000100,
1191 0xd80c, 0xff000ff0, 0x00000100
1192};
1193
1194static const u32 kalindi_golden_spm_registers[] =
1195{
1196 0x30800, 0xe0ffffff, 0xe0000000
1197};
1198
1199static const u32 kalindi_golden_common_registers[] =
1200{
1201 0xc770, 0xffffffff, 0x00000800,
1202 0xc774, 0xffffffff, 0x00000800,
1203 0xc798, 0xffffffff, 0x00007fbf,
1204 0xc79c, 0xffffffff, 0x00007faf
1205};
1206
1207static const u32 kalindi_golden_registers[] =
1208{
1209 0x3c000, 0xffffdfff, 0x6e944040,
1210 0x55e4, 0xff607fff, 0xfc000100,
1211 0x3c220, 0xff000fff, 0x00000100,
1212 0x3c224, 0xff000fff, 0x00000100,
1213 0x3c200, 0xfffc0fff, 0x00000100,
1214 0x6ed8, 0x00010101, 0x00010000,
1215 0x9830, 0xffffffff, 0x00000000,
1216 0x9834, 0xf00fffff, 0x00000400,
1217 0x5bb0, 0x000000f0, 0x00000070,
1218 0x5bc0, 0xf0311fff, 0x80300000,
1219 0x98f8, 0x73773777, 0x12010001,
1220 0x98fc, 0xffffffff, 0x00000010,
1221 0x9b7c, 0x00ff0000, 0x00fc0000,
1222 0x8030, 0x00001f0f, 0x0000100a,
1223 0x2f48, 0x73773777, 0x12010001,
1224 0x2408, 0x000fffff, 0x000c007f,
1225 0x8a14, 0xf000003f, 0x00000007,
1226 0x8b24, 0x3fff3fff, 0x00ffcfff,
1227 0x30a04, 0x0000ff0f, 0x00000000,
1228 0x28a4c, 0x07ffffff, 0x06000000,
1229 0x4d8, 0x00000fff, 0x00000100,
1230 0x3e78, 0x00000001, 0x00000002,
1231 0xc768, 0x00000008, 0x00000008,
1232 0x8c00, 0x000000ff, 0x00000003,
1233 0x214f8, 0x01ff01ff, 0x00000002,
1234 0x21498, 0x007ff800, 0x00200000,
1235 0x2015c, 0xffffffff, 0x00000f40,
1236 0x88c4, 0x001f3ae3, 0x00000082,
1237 0x88d4, 0x0000001f, 0x00000010,
1238 0x30934, 0xffffffff, 0x00000000
1239};
1240
1241static const u32 kalindi_mgcg_cgcg_init[] =
1242{
1243 0xc420, 0xffffffff, 0xfffffffc,
1244 0x30800, 0xffffffff, 0xe0000000,
1245 0x3c2a0, 0xffffffff, 0x00000100,
1246 0x3c208, 0xffffffff, 0x00000100,
1247 0x3c2c0, 0xffffffff, 0x00000100,
1248 0x3c2c8, 0xffffffff, 0x00000100,
1249 0x3c2c4, 0xffffffff, 0x00000100,
1250 0x55e4, 0xffffffff, 0x00600100,
1251 0x3c280, 0xffffffff, 0x00000100,
1252 0x3c214, 0xffffffff, 0x06000100,
1253 0x3c220, 0xffffffff, 0x00000100,
1254 0x3c218, 0xffffffff, 0x06000100,
1255 0x3c204, 0xffffffff, 0x00000100,
1256 0x3c2e0, 0xffffffff, 0x00000100,
1257 0x3c224, 0xffffffff, 0x00000100,
1258 0x3c200, 0xffffffff, 0x00000100,
1259 0x3c230, 0xffffffff, 0x00000100,
1260 0x3c234, 0xffffffff, 0x00000100,
1261 0x3c250, 0xffffffff, 0x00000100,
1262 0x3c254, 0xffffffff, 0x00000100,
1263 0x3c258, 0xffffffff, 0x00000100,
1264 0x3c25c, 0xffffffff, 0x00000100,
1265 0x3c260, 0xffffffff, 0x00000100,
1266 0x3c27c, 0xffffffff, 0x00000100,
1267 0x3c278, 0xffffffff, 0x00000100,
1268 0x3c210, 0xffffffff, 0x06000100,
1269 0x3c290, 0xffffffff, 0x00000100,
1270 0x3c274, 0xffffffff, 0x00000100,
1271 0x3c2b4, 0xffffffff, 0x00000100,
1272 0x3c2b0, 0xffffffff, 0x00000100,
1273 0x3c270, 0xffffffff, 0x00000100,
1274 0x30800, 0xffffffff, 0xe0000000,
1275 0x3c020, 0xffffffff, 0x00010000,
1276 0x3c024, 0xffffffff, 0x00030002,
1277 0x3c028, 0xffffffff, 0x00040007,
1278 0x3c02c, 0xffffffff, 0x00060005,
1279 0x3c030, 0xffffffff, 0x00090008,
1280 0x3c034, 0xffffffff, 0x00010000,
1281 0x3c038, 0xffffffff, 0x00030002,
1282 0x3c03c, 0xffffffff, 0x00040007,
1283 0x3c040, 0xffffffff, 0x00060005,
1284 0x3c044, 0xffffffff, 0x00090008,
1285 0x3c000, 0xffffffff, 0x96e00200,
1286 0x8708, 0xffffffff, 0x00900100,
1287 0xc424, 0xffffffff, 0x0020003f,
1288 0x38, 0xffffffff, 0x0140001c,
1289 0x3c, 0x000f0000, 0x000f0000,
1290 0x220, 0xffffffff, 0xC060000C,
1291 0x224, 0xc0000fff, 0x00000100,
1292 0x20a8, 0xffffffff, 0x00000104,
1293 0x55e4, 0xff000fff, 0x00000100,
1294 0x30cc, 0xc0000fff, 0x00000104,
1295 0xc1e4, 0x00000001, 0x00000001,
1296 0xd00c, 0xff000ff0, 0x00000100,
1297 0xd80c, 0xff000ff0, 0x00000100
1298};
1299
1300static void cik_init_golden_registers(struct radeon_device *rdev)
1301{
1302 switch (rdev->family) {
1303 case CHIP_BONAIRE:
1304 radeon_program_register_sequence(rdev,
1305 bonaire_mgcg_cgcg_init,
1306 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1307 radeon_program_register_sequence(rdev,
1308 bonaire_golden_registers,
1309 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1310 radeon_program_register_sequence(rdev,
1311 bonaire_golden_common_registers,
1312 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1313 radeon_program_register_sequence(rdev,
1314 bonaire_golden_spm_registers,
1315 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1316 break;
1317 case CHIP_KABINI:
1318 radeon_program_register_sequence(rdev,
1319 kalindi_mgcg_cgcg_init,
1320 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1321 radeon_program_register_sequence(rdev,
1322 kalindi_golden_registers,
1323 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1324 radeon_program_register_sequence(rdev,
1325 kalindi_golden_common_registers,
1326 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1327 radeon_program_register_sequence(rdev,
1328 kalindi_golden_spm_registers,
1329 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1330 break;
1331 case CHIP_KAVERI:
1332 radeon_program_register_sequence(rdev,
1333 spectre_mgcg_cgcg_init,
1334 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1335 radeon_program_register_sequence(rdev,
1336 spectre_golden_registers,
1337 (const u32)ARRAY_SIZE(spectre_golden_registers));
1338 radeon_program_register_sequence(rdev,
1339 spectre_golden_common_registers,
1340 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1341 radeon_program_register_sequence(rdev,
1342 spectre_golden_spm_registers,
1343 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1344 break;
1345 default:
1346 break;
1347 }
1348}
1349
2c67912c
AD
1350/**
1351 * cik_get_xclk - get the xclk
1352 *
1353 * @rdev: radeon_device pointer
1354 *
1355 * Returns the reference clock used by the gfx engine
1356 * (CIK).
1357 */
1358u32 cik_get_xclk(struct radeon_device *rdev)
1359{
1360 u32 reference_clock = rdev->clock.spll.reference_freq;
1361
1362 if (rdev->flags & RADEON_IS_IGP) {
1363 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1364 return reference_clock / 2;
1365 } else {
1366 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1367 return reference_clock / 4;
1368 }
1369 return reference_clock;
1370}
1371
75efdee1
AD
1372/**
1373 * cik_mm_rdoorbell - read a doorbell dword
1374 *
1375 * @rdev: radeon_device pointer
1376 * @offset: byte offset into the aperture
1377 *
1378 * Returns the value in the doorbell aperture at the
1379 * requested offset (CIK).
1380 */
1381u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1382{
1383 if (offset < rdev->doorbell.size) {
1384 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1385 } else {
1386 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1387 return 0;
1388 }
1389}
1390
1391/**
1392 * cik_mm_wdoorbell - write a doorbell dword
1393 *
1394 * @rdev: radeon_device pointer
1395 * @offset: byte offset into the aperture
1396 * @v: value to write
1397 *
1398 * Writes @v to the doorbell aperture at the
1399 * requested offset (CIK).
1400 */
1401void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1402{
1403 if (offset < rdev->doorbell.size) {
1404 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1405 } else {
1406 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1407 }
1408}
1409
bc8273fe
AD
1410#define BONAIRE_IO_MC_REGS_SIZE 36
1411
1412static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1413{
1414 {0x00000070, 0x04400000},
1415 {0x00000071, 0x80c01803},
1416 {0x00000072, 0x00004004},
1417 {0x00000073, 0x00000100},
1418 {0x00000074, 0x00ff0000},
1419 {0x00000075, 0x34000000},
1420 {0x00000076, 0x08000014},
1421 {0x00000077, 0x00cc08ec},
1422 {0x00000078, 0x00000400},
1423 {0x00000079, 0x00000000},
1424 {0x0000007a, 0x04090000},
1425 {0x0000007c, 0x00000000},
1426 {0x0000007e, 0x4408a8e8},
1427 {0x0000007f, 0x00000304},
1428 {0x00000080, 0x00000000},
1429 {0x00000082, 0x00000001},
1430 {0x00000083, 0x00000002},
1431 {0x00000084, 0xf3e4f400},
1432 {0x00000085, 0x052024e3},
1433 {0x00000087, 0x00000000},
1434 {0x00000088, 0x01000000},
1435 {0x0000008a, 0x1c0a0000},
1436 {0x0000008b, 0xff010000},
1437 {0x0000008d, 0xffffefff},
1438 {0x0000008e, 0xfff3efff},
1439 {0x0000008f, 0xfff3efbf},
1440 {0x00000092, 0xf7ffffff},
1441 {0x00000093, 0xffffff7f},
1442 {0x00000095, 0x00101101},
1443 {0x00000096, 0x00000fff},
1444 {0x00000097, 0x00116fff},
1445 {0x00000098, 0x60010000},
1446 {0x00000099, 0x10010000},
1447 {0x0000009a, 0x00006000},
1448 {0x0000009b, 0x00001000},
1449 {0x0000009f, 0x00b48000}
1450};
1451
b556b12e
AD
1452/**
1453 * cik_srbm_select - select specific register instances
1454 *
1455 * @rdev: radeon_device pointer
1456 * @me: selected ME (micro engine)
1457 * @pipe: pipe
1458 * @queue: queue
1459 * @vmid: VMID
1460 *
1461 * Switches the currently active registers instances. Some
1462 * registers are instanced per VMID, others are instanced per
1463 * me/pipe/queue combination.
1464 */
1465static void cik_srbm_select(struct radeon_device *rdev,
1466 u32 me, u32 pipe, u32 queue, u32 vmid)
1467{
1468 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1469 MEID(me & 0x3) |
1470 VMID(vmid & 0xf) |
1471 QUEUEID(queue & 0x7));
1472 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1473}
1474
bc8273fe
AD
1475/* ucode loading */
1476/**
1477 * ci_mc_load_microcode - load MC ucode into the hw
1478 *
1479 * @rdev: radeon_device pointer
1480 *
1481 * Load the GDDR MC ucode into the hw (CIK).
1482 * Returns 0 on success, error on failure.
1483 */
1484static int ci_mc_load_microcode(struct radeon_device *rdev)
1485{
1486 const __be32 *fw_data;
1487 u32 running, blackout = 0;
1488 u32 *io_mc_regs;
1489 int i, ucode_size, regs_size;
1490
1491 if (!rdev->mc_fw)
1492 return -EINVAL;
1493
1494 switch (rdev->family) {
1495 case CHIP_BONAIRE:
1496 default:
1497 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1498 ucode_size = CIK_MC_UCODE_SIZE;
1499 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1500 break;
1501 }
1502
1503 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1504
1505 if (running == 0) {
1506 if (running) {
1507 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1508 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1509 }
1510
1511 /* reset the engine and set to writable */
1512 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1514
1515 /* load mc io regs */
1516 for (i = 0; i < regs_size; i++) {
1517 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1518 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1519 }
1520 /* load the MC ucode */
1521 fw_data = (const __be32 *)rdev->mc_fw->data;
1522 for (i = 0; i < ucode_size; i++)
1523 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1524
1525 /* put the engine back into the active state */
1526 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1527 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1528 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1529
1530 /* wait for training to complete */
1531 for (i = 0; i < rdev->usec_timeout; i++) {
1532 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1533 break;
1534 udelay(1);
1535 }
1536 for (i = 0; i < rdev->usec_timeout; i++) {
1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1538 break;
1539 udelay(1);
1540 }
1541
1542 if (running)
1543 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1544 }
1545
1546 return 0;
1547}
1548
02c81327
AD
1549/**
1550 * cik_init_microcode - load ucode images from disk
1551 *
1552 * @rdev: radeon_device pointer
1553 *
1554 * Use the firmware interface to load the ucode images into
1555 * the driver (not loaded into hw).
1556 * Returns 0 on success, error on failure.
1557 */
1558static int cik_init_microcode(struct radeon_device *rdev)
1559{
02c81327
AD
1560 const char *chip_name;
1561 size_t pfp_req_size, me_req_size, ce_req_size,
21a93e13 1562 mec_req_size, rlc_req_size, mc_req_size,
cc8dbbb4 1563 sdma_req_size, smc_req_size;
02c81327
AD
1564 char fw_name[30];
1565 int err;
1566
1567 DRM_DEBUG("\n");
1568
02c81327
AD
1569 switch (rdev->family) {
1570 case CHIP_BONAIRE:
1571 chip_name = "BONAIRE";
1572 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1573 me_req_size = CIK_ME_UCODE_SIZE * 4;
1574 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1575 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1576 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1577 mc_req_size = CIK_MC_UCODE_SIZE * 4;
21a93e13 1578 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
cc8dbbb4 1579 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
02c81327
AD
1580 break;
1581 case CHIP_KAVERI:
1582 chip_name = "KAVERI";
1583 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1584 me_req_size = CIK_ME_UCODE_SIZE * 4;
1585 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1586 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1587 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
21a93e13 1588 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1589 break;
1590 case CHIP_KABINI:
1591 chip_name = "KABINI";
1592 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1593 me_req_size = CIK_ME_UCODE_SIZE * 4;
1594 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1595 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1596 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
21a93e13 1597 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1598 break;
1599 default: BUG();
1600 }
1601
1602 DRM_INFO("Loading %s Microcode\n", chip_name);
1603
1604 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
0a168933 1605 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
02c81327
AD
1606 if (err)
1607 goto out;
1608 if (rdev->pfp_fw->size != pfp_req_size) {
1609 printk(KERN_ERR
1610 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1611 rdev->pfp_fw->size, fw_name);
1612 err = -EINVAL;
1613 goto out;
1614 }
1615
1616 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
0a168933 1617 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
02c81327
AD
1618 if (err)
1619 goto out;
1620 if (rdev->me_fw->size != me_req_size) {
1621 printk(KERN_ERR
1622 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1623 rdev->me_fw->size, fw_name);
1624 err = -EINVAL;
1625 }
1626
1627 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
0a168933 1628 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
02c81327
AD
1629 if (err)
1630 goto out;
1631 if (rdev->ce_fw->size != ce_req_size) {
1632 printk(KERN_ERR
1633 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1634 rdev->ce_fw->size, fw_name);
1635 err = -EINVAL;
1636 }
1637
1638 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
0a168933 1639 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
02c81327
AD
1640 if (err)
1641 goto out;
1642 if (rdev->mec_fw->size != mec_req_size) {
1643 printk(KERN_ERR
1644 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1645 rdev->mec_fw->size, fw_name);
1646 err = -EINVAL;
1647 }
1648
1649 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
0a168933 1650 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
02c81327
AD
1651 if (err)
1652 goto out;
1653 if (rdev->rlc_fw->size != rlc_req_size) {
1654 printk(KERN_ERR
1655 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1656 rdev->rlc_fw->size, fw_name);
1657 err = -EINVAL;
1658 }
1659
21a93e13 1660 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
0a168933 1661 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
21a93e13
AD
1662 if (err)
1663 goto out;
1664 if (rdev->sdma_fw->size != sdma_req_size) {
1665 printk(KERN_ERR
1666 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1667 rdev->sdma_fw->size, fw_name);
1668 err = -EINVAL;
1669 }
1670
cc8dbbb4 1671 /* No SMC, MC ucode on APUs */
02c81327
AD
1672 if (!(rdev->flags & RADEON_IS_IGP)) {
1673 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
0a168933 1674 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
02c81327
AD
1675 if (err)
1676 goto out;
1677 if (rdev->mc_fw->size != mc_req_size) {
1678 printk(KERN_ERR
1679 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1680 rdev->mc_fw->size, fw_name);
1681 err = -EINVAL;
1682 }
cc8dbbb4
AD
1683
1684 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1685 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1686 if (err) {
1687 printk(KERN_ERR
1688 "smc: error loading firmware \"%s\"\n",
1689 fw_name);
1690 release_firmware(rdev->smc_fw);
1691 rdev->smc_fw = NULL;
d30d71e8 1692 err = 0;
cc8dbbb4
AD
1693 } else if (rdev->smc_fw->size != smc_req_size) {
1694 printk(KERN_ERR
1695 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1696 rdev->smc_fw->size, fw_name);
1697 err = -EINVAL;
1698 }
02c81327
AD
1699 }
1700
1701out:
02c81327
AD
1702 if (err) {
1703 if (err != -EINVAL)
1704 printk(KERN_ERR
1705 "cik_cp: Failed to load firmware \"%s\"\n",
1706 fw_name);
1707 release_firmware(rdev->pfp_fw);
1708 rdev->pfp_fw = NULL;
1709 release_firmware(rdev->me_fw);
1710 rdev->me_fw = NULL;
1711 release_firmware(rdev->ce_fw);
1712 rdev->ce_fw = NULL;
1713 release_firmware(rdev->rlc_fw);
1714 rdev->rlc_fw = NULL;
1715 release_firmware(rdev->mc_fw);
1716 rdev->mc_fw = NULL;
cc8dbbb4
AD
1717 release_firmware(rdev->smc_fw);
1718 rdev->smc_fw = NULL;
02c81327
AD
1719 }
1720 return err;
1721}
1722
8cc1a532
AD
1723/*
1724 * Core functions
1725 */
1726/**
1727 * cik_tiling_mode_table_init - init the hw tiling table
1728 *
1729 * @rdev: radeon_device pointer
1730 *
1731 * Starting with SI, the tiling setup is done globally in a
1732 * set of 32 tiling modes. Rather than selecting each set of
1733 * parameters per surface as on older asics, we just select
1734 * which index in the tiling table we want to use, and the
1735 * surface uses those parameters (CIK).
1736 */
1737static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1738{
1739 const u32 num_tile_mode_states = 32;
1740 const u32 num_secondary_tile_mode_states = 16;
1741 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1742 u32 num_pipe_configs;
1743 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1744 rdev->config.cik.max_shader_engines;
1745
1746 switch (rdev->config.cik.mem_row_size_in_kb) {
1747 case 1:
1748 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1749 break;
1750 case 2:
1751 default:
1752 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1753 break;
1754 case 4:
1755 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1756 break;
1757 }
1758
1759 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1760 if (num_pipe_configs > 8)
21e438af 1761 num_pipe_configs = 16;
8cc1a532 1762
21e438af
AD
1763 if (num_pipe_configs == 16) {
1764 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1765 switch (reg_offset) {
1766 case 0:
1767 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1769 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1770 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1771 break;
1772 case 1:
1773 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1775 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1776 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1777 break;
1778 case 2:
1779 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1781 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1783 break;
1784 case 3:
1785 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1786 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1787 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1788 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1789 break;
1790 case 4:
1791 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1792 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1793 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1794 TILE_SPLIT(split_equal_to_row_size));
1795 break;
1796 case 5:
1797 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1799 break;
1800 case 6:
1801 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1803 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1804 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1805 break;
1806 case 7:
1807 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1808 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1809 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1810 TILE_SPLIT(split_equal_to_row_size));
1811 break;
1812 case 8:
1813 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1814 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1815 break;
1816 case 9:
1817 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1818 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1819 break;
1820 case 10:
1821 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1822 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1823 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1825 break;
1826 case 11:
1827 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1828 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1829 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1831 break;
1832 case 12:
1833 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1834 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1835 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1837 break;
1838 case 13:
1839 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1841 break;
1842 case 14:
1843 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1845 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1847 break;
1848 case 16:
1849 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1850 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1851 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1853 break;
1854 case 17:
1855 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1857 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1859 break;
1860 case 27:
1861 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1862 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1863 break;
1864 case 28:
1865 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1866 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1867 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1869 break;
1870 case 29:
1871 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1873 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1875 break;
1876 case 30:
1877 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1878 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1879 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1881 break;
1882 default:
1883 gb_tile_moden = 0;
1884 break;
1885 }
1886 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1887 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1888 }
1889 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1890 switch (reg_offset) {
1891 case 0:
1892 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1893 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1894 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1895 NUM_BANKS(ADDR_SURF_16_BANK));
1896 break;
1897 case 1:
1898 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1899 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1900 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1901 NUM_BANKS(ADDR_SURF_16_BANK));
1902 break;
1903 case 2:
1904 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1906 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1907 NUM_BANKS(ADDR_SURF_16_BANK));
1908 break;
1909 case 3:
1910 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1913 NUM_BANKS(ADDR_SURF_16_BANK));
1914 break;
1915 case 4:
1916 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1919 NUM_BANKS(ADDR_SURF_8_BANK));
1920 break;
1921 case 5:
1922 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1923 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1924 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1925 NUM_BANKS(ADDR_SURF_4_BANK));
1926 break;
1927 case 6:
1928 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1931 NUM_BANKS(ADDR_SURF_2_BANK));
1932 break;
1933 case 8:
1934 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1937 NUM_BANKS(ADDR_SURF_16_BANK));
1938 break;
1939 case 9:
1940 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1943 NUM_BANKS(ADDR_SURF_16_BANK));
1944 break;
1945 case 10:
1946 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1949 NUM_BANKS(ADDR_SURF_16_BANK));
1950 break;
1951 case 11:
1952 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1955 NUM_BANKS(ADDR_SURF_8_BANK));
1956 break;
1957 case 12:
1958 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1961 NUM_BANKS(ADDR_SURF_4_BANK));
1962 break;
1963 case 13:
1964 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1967 NUM_BANKS(ADDR_SURF_2_BANK));
1968 break;
1969 case 14:
1970 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1973 NUM_BANKS(ADDR_SURF_2_BANK));
1974 break;
1975 default:
1976 gb_tile_moden = 0;
1977 break;
1978 }
1979 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1980 }
1981 } else if (num_pipe_configs == 8) {
8cc1a532
AD
1982 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1983 switch (reg_offset) {
1984 case 0:
1985 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1989 break;
1990 case 1:
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1995 break;
1996 case 2:
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2001 break;
2002 case 3:
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2007 break;
2008 case 4:
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2012 TILE_SPLIT(split_equal_to_row_size));
2013 break;
2014 case 5:
2015 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2017 break;
2018 case 6:
2019 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2022 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2023 break;
2024 case 7:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2028 TILE_SPLIT(split_equal_to_row_size));
2029 break;
2030 case 8:
2031 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2033 break;
2034 case 9:
2035 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2036 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2037 break;
2038 case 10:
2039 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043 break;
2044 case 11:
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049 break;
2050 case 12:
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055 break;
2056 case 13:
2057 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2059 break;
2060 case 14:
2061 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065 break;
2066 case 16:
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 break;
2072 case 17:
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 break;
2078 case 27:
2079 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2081 break;
2082 case 28:
2083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087 break;
2088 case 29:
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 break;
2094 case 30:
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 break;
2100 default:
2101 gb_tile_moden = 0;
2102 break;
2103 }
39aee490 2104 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2105 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2106 }
2107 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2108 switch (reg_offset) {
2109 case 0:
2110 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2113 NUM_BANKS(ADDR_SURF_16_BANK));
2114 break;
2115 case 1:
2116 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2119 NUM_BANKS(ADDR_SURF_16_BANK));
2120 break;
2121 case 2:
2122 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2125 NUM_BANKS(ADDR_SURF_16_BANK));
2126 break;
2127 case 3:
2128 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2131 NUM_BANKS(ADDR_SURF_16_BANK));
2132 break;
2133 case 4:
2134 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2137 NUM_BANKS(ADDR_SURF_8_BANK));
2138 break;
2139 case 5:
2140 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2143 NUM_BANKS(ADDR_SURF_4_BANK));
2144 break;
2145 case 6:
2146 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2149 NUM_BANKS(ADDR_SURF_2_BANK));
2150 break;
2151 case 8:
2152 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2155 NUM_BANKS(ADDR_SURF_16_BANK));
2156 break;
2157 case 9:
2158 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2161 NUM_BANKS(ADDR_SURF_16_BANK));
2162 break;
2163 case 10:
2164 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2167 NUM_BANKS(ADDR_SURF_16_BANK));
2168 break;
2169 case 11:
2170 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2173 NUM_BANKS(ADDR_SURF_16_BANK));
2174 break;
2175 case 12:
2176 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2179 NUM_BANKS(ADDR_SURF_8_BANK));
2180 break;
2181 case 13:
2182 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 NUM_BANKS(ADDR_SURF_4_BANK));
2186 break;
2187 case 14:
2188 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2191 NUM_BANKS(ADDR_SURF_2_BANK));
2192 break;
2193 default:
2194 gb_tile_moden = 0;
2195 break;
2196 }
2197 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2198 }
2199 } else if (num_pipe_configs == 4) {
2200 if (num_rbs == 4) {
2201 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2202 switch (reg_offset) {
2203 case 0:
2204 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2206 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2207 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2208 break;
2209 case 1:
2210 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2212 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2214 break;
2215 case 2:
2216 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2218 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2220 break;
2221 case 3:
2222 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2224 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2226 break;
2227 case 4:
2228 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2230 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2231 TILE_SPLIT(split_equal_to_row_size));
2232 break;
2233 case 5:
2234 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 break;
2237 case 6:
2238 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2240 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2241 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2242 break;
2243 case 7:
2244 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2246 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2247 TILE_SPLIT(split_equal_to_row_size));
2248 break;
2249 case 8:
2250 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2251 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2252 break;
2253 case 9:
2254 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2256 break;
2257 case 10:
2258 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 break;
2263 case 11:
2264 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2266 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2268 break;
2269 case 12:
2270 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2272 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 break;
2275 case 13:
2276 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2278 break;
2279 case 14:
2280 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2282 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2284 break;
2285 case 16:
2286 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 break;
2291 case 17:
2292 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296 break;
2297 case 27:
2298 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2300 break;
2301 case 28:
2302 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2304 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306 break;
2307 case 29:
2308 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2310 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312 break;
2313 case 30:
2314 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318 break;
2319 default:
2320 gb_tile_moden = 0;
2321 break;
2322 }
39aee490 2323 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2324 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2325 }
2326 } else if (num_rbs < 4) {
2327 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2328 switch (reg_offset) {
2329 case 0:
2330 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2334 break;
2335 case 1:
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2340 break;
2341 case 2:
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2346 break;
2347 case 3:
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2352 break;
2353 case 4:
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2357 TILE_SPLIT(split_equal_to_row_size));
2358 break;
2359 case 5:
2360 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2362 break;
2363 case 6:
2364 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2367 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2368 break;
2369 case 7:
2370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2373 TILE_SPLIT(split_equal_to_row_size));
2374 break;
2375 case 8:
2376 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2377 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2378 break;
2379 case 9:
2380 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2382 break;
2383 case 10:
2384 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 break;
2389 case 11:
2390 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2392 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2394 break;
2395 case 12:
2396 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400 break;
2401 case 13:
2402 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2404 break;
2405 case 14:
2406 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 break;
2411 case 16:
2412 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 break;
2417 case 17:
2418 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422 break;
2423 case 27:
2424 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2426 break;
2427 case 28:
2428 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 break;
2433 case 29:
2434 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 break;
2439 case 30:
2440 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 break;
2445 default:
2446 gb_tile_moden = 0;
2447 break;
2448 }
39aee490 2449 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2450 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2451 }
2452 }
2453 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2454 switch (reg_offset) {
2455 case 0:
2456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2459 NUM_BANKS(ADDR_SURF_16_BANK));
2460 break;
2461 case 1:
2462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2465 NUM_BANKS(ADDR_SURF_16_BANK));
2466 break;
2467 case 2:
2468 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471 NUM_BANKS(ADDR_SURF_16_BANK));
2472 break;
2473 case 3:
2474 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477 NUM_BANKS(ADDR_SURF_16_BANK));
2478 break;
2479 case 4:
2480 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483 NUM_BANKS(ADDR_SURF_16_BANK));
2484 break;
2485 case 5:
2486 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489 NUM_BANKS(ADDR_SURF_8_BANK));
2490 break;
2491 case 6:
2492 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495 NUM_BANKS(ADDR_SURF_4_BANK));
2496 break;
2497 case 8:
2498 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2501 NUM_BANKS(ADDR_SURF_16_BANK));
2502 break;
2503 case 9:
2504 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2507 NUM_BANKS(ADDR_SURF_16_BANK));
2508 break;
2509 case 10:
2510 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2513 NUM_BANKS(ADDR_SURF_16_BANK));
2514 break;
2515 case 11:
2516 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519 NUM_BANKS(ADDR_SURF_16_BANK));
2520 break;
2521 case 12:
2522 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525 NUM_BANKS(ADDR_SURF_16_BANK));
2526 break;
2527 case 13:
2528 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531 NUM_BANKS(ADDR_SURF_8_BANK));
2532 break;
2533 case 14:
2534 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2537 NUM_BANKS(ADDR_SURF_4_BANK));
2538 break;
2539 default:
2540 gb_tile_moden = 0;
2541 break;
2542 }
2543 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2544 }
2545 } else if (num_pipe_configs == 2) {
2546 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2547 switch (reg_offset) {
2548 case 0:
2549 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 PIPE_CONFIG(ADDR_SURF_P2) |
2552 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2553 break;
2554 case 1:
2555 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 PIPE_CONFIG(ADDR_SURF_P2) |
2558 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2559 break;
2560 case 2:
2561 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 PIPE_CONFIG(ADDR_SURF_P2) |
2564 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2565 break;
2566 case 3:
2567 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2569 PIPE_CONFIG(ADDR_SURF_P2) |
2570 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2571 break;
2572 case 4:
2573 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575 PIPE_CONFIG(ADDR_SURF_P2) |
2576 TILE_SPLIT(split_equal_to_row_size));
2577 break;
2578 case 5:
2579 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2580 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2581 break;
2582 case 6:
2583 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2585 PIPE_CONFIG(ADDR_SURF_P2) |
2586 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2587 break;
2588 case 7:
2589 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2591 PIPE_CONFIG(ADDR_SURF_P2) |
2592 TILE_SPLIT(split_equal_to_row_size));
2593 break;
2594 case 8:
2595 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2596 break;
2597 case 9:
2598 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2600 break;
2601 case 10:
2602 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 PIPE_CONFIG(ADDR_SURF_P2) |
2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 break;
2607 case 11:
2608 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610 PIPE_CONFIG(ADDR_SURF_P2) |
2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612 break;
2613 case 12:
2614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 PIPE_CONFIG(ADDR_SURF_P2) |
2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618 break;
2619 case 13:
2620 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2622 break;
2623 case 14:
2624 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2626 PIPE_CONFIG(ADDR_SURF_P2) |
2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 break;
2629 case 16:
2630 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 PIPE_CONFIG(ADDR_SURF_P2) |
2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 break;
2635 case 17:
2636 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2638 PIPE_CONFIG(ADDR_SURF_P2) |
2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640 break;
2641 case 27:
2642 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2644 break;
2645 case 28:
2646 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648 PIPE_CONFIG(ADDR_SURF_P2) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 break;
2651 case 29:
2652 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654 PIPE_CONFIG(ADDR_SURF_P2) |
2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656 break;
2657 case 30:
2658 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2660 PIPE_CONFIG(ADDR_SURF_P2) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2662 break;
2663 default:
2664 gb_tile_moden = 0;
2665 break;
2666 }
39aee490 2667 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2668 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2669 }
2670 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2671 switch (reg_offset) {
2672 case 0:
2673 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2674 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2676 NUM_BANKS(ADDR_SURF_16_BANK));
2677 break;
2678 case 1:
2679 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2682 NUM_BANKS(ADDR_SURF_16_BANK));
2683 break;
2684 case 2:
2685 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2688 NUM_BANKS(ADDR_SURF_16_BANK));
2689 break;
2690 case 3:
2691 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2694 NUM_BANKS(ADDR_SURF_16_BANK));
2695 break;
2696 case 4:
2697 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2700 NUM_BANKS(ADDR_SURF_16_BANK));
2701 break;
2702 case 5:
2703 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2706 NUM_BANKS(ADDR_SURF_16_BANK));
2707 break;
2708 case 6:
2709 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2712 NUM_BANKS(ADDR_SURF_8_BANK));
2713 break;
2714 case 8:
2715 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2716 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2717 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718 NUM_BANKS(ADDR_SURF_16_BANK));
2719 break;
2720 case 9:
2721 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724 NUM_BANKS(ADDR_SURF_16_BANK));
2725 break;
2726 case 10:
2727 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2728 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2729 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730 NUM_BANKS(ADDR_SURF_16_BANK));
2731 break;
2732 case 11:
2733 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736 NUM_BANKS(ADDR_SURF_16_BANK));
2737 break;
2738 case 12:
2739 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2741 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742 NUM_BANKS(ADDR_SURF_16_BANK));
2743 break;
2744 case 13:
2745 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2748 NUM_BANKS(ADDR_SURF_16_BANK));
2749 break;
2750 case 14:
2751 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754 NUM_BANKS(ADDR_SURF_8_BANK));
2755 break;
2756 default:
2757 gb_tile_moden = 0;
2758 break;
2759 }
2760 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2761 }
2762 } else
2763 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2764}
2765
2766/**
2767 * cik_select_se_sh - select which SE, SH to address
2768 *
2769 * @rdev: radeon_device pointer
2770 * @se_num: shader engine to address
2771 * @sh_num: sh block to address
2772 *
2773 * Select which SE, SH combinations to address. Certain
2774 * registers are instanced per SE or SH. 0xffffffff means
2775 * broadcast to all SEs or SHs (CIK).
2776 */
2777static void cik_select_se_sh(struct radeon_device *rdev,
2778 u32 se_num, u32 sh_num)
2779{
2780 u32 data = INSTANCE_BROADCAST_WRITES;
2781
2782 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
b0fe3d39 2783 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
8cc1a532
AD
2784 else if (se_num == 0xffffffff)
2785 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2786 else if (sh_num == 0xffffffff)
2787 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2788 else
2789 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2790 WREG32(GRBM_GFX_INDEX, data);
2791}
2792
2793/**
2794 * cik_create_bitmask - create a bitmask
2795 *
2796 * @bit_width: length of the mask
2797 *
2798 * create a variable length bit mask (CIK).
2799 * Returns the bitmask.
2800 */
2801static u32 cik_create_bitmask(u32 bit_width)
2802{
2803 u32 i, mask = 0;
2804
2805 for (i = 0; i < bit_width; i++) {
2806 mask <<= 1;
2807 mask |= 1;
2808 }
2809 return mask;
2810}
2811
2812/**
2813 * cik_select_se_sh - select which SE, SH to address
2814 *
2815 * @rdev: radeon_device pointer
2816 * @max_rb_num: max RBs (render backends) for the asic
2817 * @se_num: number of SEs (shader engines) for the asic
2818 * @sh_per_se: number of SH blocks per SE for the asic
2819 *
2820 * Calculates the bitmask of disabled RBs (CIK).
2821 * Returns the disabled RB bitmask.
2822 */
2823static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2824 u32 max_rb_num, u32 se_num,
2825 u32 sh_per_se)
2826{
2827 u32 data, mask;
2828
2829 data = RREG32(CC_RB_BACKEND_DISABLE);
2830 if (data & 1)
2831 data &= BACKEND_DISABLE_MASK;
2832 else
2833 data = 0;
2834 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2835
2836 data >>= BACKEND_DISABLE_SHIFT;
2837
2838 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2839
2840 return data & mask;
2841}
2842
2843/**
2844 * cik_setup_rb - setup the RBs on the asic
2845 *
2846 * @rdev: radeon_device pointer
2847 * @se_num: number of SEs (shader engines) for the asic
2848 * @sh_per_se: number of SH blocks per SE for the asic
2849 * @max_rb_num: max RBs (render backends) for the asic
2850 *
2851 * Configures per-SE/SH RB registers (CIK).
2852 */
2853static void cik_setup_rb(struct radeon_device *rdev,
2854 u32 se_num, u32 sh_per_se,
2855 u32 max_rb_num)
2856{
2857 int i, j;
2858 u32 data, mask;
2859 u32 disabled_rbs = 0;
2860 u32 enabled_rbs = 0;
2861
2862 for (i = 0; i < se_num; i++) {
2863 for (j = 0; j < sh_per_se; j++) {
2864 cik_select_se_sh(rdev, i, j);
2865 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2866 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2867 }
2868 }
2869 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2870
2871 mask = 1;
2872 for (i = 0; i < max_rb_num; i++) {
2873 if (!(disabled_rbs & mask))
2874 enabled_rbs |= mask;
2875 mask <<= 1;
2876 }
2877
2878 for (i = 0; i < se_num; i++) {
2879 cik_select_se_sh(rdev, i, 0xffffffff);
2880 data = 0;
2881 for (j = 0; j < sh_per_se; j++) {
2882 switch (enabled_rbs & 3) {
2883 case 1:
2884 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2885 break;
2886 case 2:
2887 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2888 break;
2889 case 3:
2890 default:
2891 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2892 break;
2893 }
2894 enabled_rbs >>= 2;
2895 }
2896 WREG32(PA_SC_RASTER_CONFIG, data);
2897 }
2898 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2899}
2900
2901/**
2902 * cik_gpu_init - setup the 3D engine
2903 *
2904 * @rdev: radeon_device pointer
2905 *
2906 * Configures the 3D engine and tiling configuration
2907 * registers so that the 3D engine is usable.
2908 */
2909static void cik_gpu_init(struct radeon_device *rdev)
2910{
2911 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2912 u32 mc_shared_chmap, mc_arb_ramcfg;
2913 u32 hdp_host_path_cntl;
2914 u32 tmp;
2915 int i, j;
2916
2917 switch (rdev->family) {
2918 case CHIP_BONAIRE:
2919 rdev->config.cik.max_shader_engines = 2;
2920 rdev->config.cik.max_tile_pipes = 4;
2921 rdev->config.cik.max_cu_per_sh = 7;
2922 rdev->config.cik.max_sh_per_se = 1;
2923 rdev->config.cik.max_backends_per_se = 2;
2924 rdev->config.cik.max_texture_channel_caches = 4;
2925 rdev->config.cik.max_gprs = 256;
2926 rdev->config.cik.max_gs_threads = 32;
2927 rdev->config.cik.max_hw_contexts = 8;
2928
2929 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2930 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2931 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2932 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2933 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2934 break;
b496038b
AD
2935 case CHIP_HAWAII:
2936 rdev->config.cik.max_shader_engines = 4;
2937 rdev->config.cik.max_tile_pipes = 16;
2938 rdev->config.cik.max_cu_per_sh = 11;
2939 rdev->config.cik.max_sh_per_se = 1;
2940 rdev->config.cik.max_backends_per_se = 4;
2941 rdev->config.cik.max_texture_channel_caches = 16;
2942 rdev->config.cik.max_gprs = 256;
2943 rdev->config.cik.max_gs_threads = 32;
2944 rdev->config.cik.max_hw_contexts = 8;
2945
2946 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2947 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2948 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2949 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2950 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
2951 break;
8cc1a532 2952 case CHIP_KAVERI:
b2e4c70a
AD
2953 rdev->config.cik.max_shader_engines = 1;
2954 rdev->config.cik.max_tile_pipes = 4;
2955 if ((rdev->pdev->device == 0x1304) ||
2956 (rdev->pdev->device == 0x1305) ||
2957 (rdev->pdev->device == 0x130C) ||
2958 (rdev->pdev->device == 0x130F) ||
2959 (rdev->pdev->device == 0x1310) ||
2960 (rdev->pdev->device == 0x1311) ||
2961 (rdev->pdev->device == 0x131C)) {
2962 rdev->config.cik.max_cu_per_sh = 8;
2963 rdev->config.cik.max_backends_per_se = 2;
2964 } else if ((rdev->pdev->device == 0x1309) ||
2965 (rdev->pdev->device == 0x130A) ||
2966 (rdev->pdev->device == 0x130D) ||
7c4622d5
AD
2967 (rdev->pdev->device == 0x1313) ||
2968 (rdev->pdev->device == 0x131D)) {
b2e4c70a
AD
2969 rdev->config.cik.max_cu_per_sh = 6;
2970 rdev->config.cik.max_backends_per_se = 2;
2971 } else if ((rdev->pdev->device == 0x1306) ||
2972 (rdev->pdev->device == 0x1307) ||
2973 (rdev->pdev->device == 0x130B) ||
2974 (rdev->pdev->device == 0x130E) ||
2975 (rdev->pdev->device == 0x1315) ||
2976 (rdev->pdev->device == 0x131B)) {
2977 rdev->config.cik.max_cu_per_sh = 4;
2978 rdev->config.cik.max_backends_per_se = 1;
2979 } else {
2980 rdev->config.cik.max_cu_per_sh = 3;
2981 rdev->config.cik.max_backends_per_se = 1;
2982 }
2983 rdev->config.cik.max_sh_per_se = 1;
2984 rdev->config.cik.max_texture_channel_caches = 4;
2985 rdev->config.cik.max_gprs = 256;
2986 rdev->config.cik.max_gs_threads = 16;
2987 rdev->config.cik.max_hw_contexts = 8;
2988
2989 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2990 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2991 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2992 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2993 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
8cc1a532
AD
2994 break;
2995 case CHIP_KABINI:
2996 default:
2997 rdev->config.cik.max_shader_engines = 1;
2998 rdev->config.cik.max_tile_pipes = 2;
2999 rdev->config.cik.max_cu_per_sh = 2;
3000 rdev->config.cik.max_sh_per_se = 1;
3001 rdev->config.cik.max_backends_per_se = 1;
3002 rdev->config.cik.max_texture_channel_caches = 2;
3003 rdev->config.cik.max_gprs = 256;
3004 rdev->config.cik.max_gs_threads = 16;
3005 rdev->config.cik.max_hw_contexts = 8;
3006
3007 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3008 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3009 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3010 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3011 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3012 break;
3013 }
3014
3015 /* Initialize HDP */
3016 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3017 WREG32((0x2c14 + j), 0x00000000);
3018 WREG32((0x2c18 + j), 0x00000000);
3019 WREG32((0x2c1c + j), 0x00000000);
3020 WREG32((0x2c20 + j), 0x00000000);
3021 WREG32((0x2c24 + j), 0x00000000);
3022 }
3023
3024 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3025
3026 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3027
3028 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3029 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3030
3031 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3032 rdev->config.cik.mem_max_burst_length_bytes = 256;
3033 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3034 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3035 if (rdev->config.cik.mem_row_size_in_kb > 4)
3036 rdev->config.cik.mem_row_size_in_kb = 4;
3037 /* XXX use MC settings? */
3038 rdev->config.cik.shader_engine_tile_size = 32;
3039 rdev->config.cik.num_gpus = 1;
3040 rdev->config.cik.multi_gpu_tile_size = 64;
3041
3042 /* fix up row size */
3043 gb_addr_config &= ~ROW_SIZE_MASK;
3044 switch (rdev->config.cik.mem_row_size_in_kb) {
3045 case 1:
3046 default:
3047 gb_addr_config |= ROW_SIZE(0);
3048 break;
3049 case 2:
3050 gb_addr_config |= ROW_SIZE(1);
3051 break;
3052 case 4:
3053 gb_addr_config |= ROW_SIZE(2);
3054 break;
3055 }
3056
3057 /* setup tiling info dword. gb_addr_config is not adequate since it does
3058 * not have bank info, so create a custom tiling dword.
3059 * bits 3:0 num_pipes
3060 * bits 7:4 num_banks
3061 * bits 11:8 group_size
3062 * bits 15:12 row_size
3063 */
3064 rdev->config.cik.tile_config = 0;
3065 switch (rdev->config.cik.num_tile_pipes) {
3066 case 1:
3067 rdev->config.cik.tile_config |= (0 << 0);
3068 break;
3069 case 2:
3070 rdev->config.cik.tile_config |= (1 << 0);
3071 break;
3072 case 4:
3073 rdev->config.cik.tile_config |= (2 << 0);
3074 break;
3075 case 8:
3076 default:
3077 /* XXX what about 12? */
3078 rdev->config.cik.tile_config |= (3 << 0);
3079 break;
3080 }
a537314e
MD
3081 rdev->config.cik.tile_config |=
3082 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
8cc1a532
AD
3083 rdev->config.cik.tile_config |=
3084 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3085 rdev->config.cik.tile_config |=
3086 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3087
3088 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3089 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3090 WREG32(DMIF_ADDR_CALC, gb_addr_config);
21a93e13
AD
3091 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3092 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
87167bb1
CK
3093 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3094 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3095 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
8cc1a532
AD
3096
3097 cik_tiling_mode_table_init(rdev);
3098
3099 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3100 rdev->config.cik.max_sh_per_se,
3101 rdev->config.cik.max_backends_per_se);
3102
3103 /* set HW defaults for 3D engine */
3104 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3105
3106 WREG32(SX_DEBUG_1, 0x20);
3107
3108 WREG32(TA_CNTL_AUX, 0x00010000);
3109
3110 tmp = RREG32(SPI_CONFIG_CNTL);
3111 tmp |= 0x03000000;
3112 WREG32(SPI_CONFIG_CNTL, tmp);
3113
3114 WREG32(SQ_CONFIG, 1);
3115
3116 WREG32(DB_DEBUG, 0);
3117
3118 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3119 tmp |= 0x00000400;
3120 WREG32(DB_DEBUG2, tmp);
3121
3122 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3123 tmp |= 0x00020200;
3124 WREG32(DB_DEBUG3, tmp);
3125
3126 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3127 tmp |= 0x00018208;
3128 WREG32(CB_HW_CONTROL, tmp);
3129
3130 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3131
3132 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3133 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3134 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3135 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3136
3137 WREG32(VGT_NUM_INSTANCES, 1);
3138
3139 WREG32(CP_PERFMON_CNTL, 0);
3140
3141 WREG32(SQ_CONFIG, 0);
3142
3143 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3144 FORCE_EOV_MAX_REZ_CNT(255)));
3145
3146 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3147 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3148
3149 WREG32(VGT_GS_VERTEX_REUSE, 16);
3150 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3151
3152 tmp = RREG32(HDP_MISC_CNTL);
3153 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3154 WREG32(HDP_MISC_CNTL, tmp);
3155
3156 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3157 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3158
3159 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3160 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3161
3162 udelay(50);
3163}
3164
2cae3bc3
AD
3165/*
3166 * GPU scratch registers helpers function.
3167 */
3168/**
3169 * cik_scratch_init - setup driver info for CP scratch regs
3170 *
3171 * @rdev: radeon_device pointer
3172 *
3173 * Set up the number and offset of the CP scratch registers.
3174 * NOTE: use of CP scratch registers is a legacy inferface and
3175 * is not used by default on newer asics (r6xx+). On newer asics,
3176 * memory buffers are used for fences rather than scratch regs.
3177 */
3178static void cik_scratch_init(struct radeon_device *rdev)
3179{
3180 int i;
3181
3182 rdev->scratch.num_reg = 7;
3183 rdev->scratch.reg_base = SCRATCH_REG0;
3184 for (i = 0; i < rdev->scratch.num_reg; i++) {
3185 rdev->scratch.free[i] = true;
3186 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3187 }
3188}
3189
fbc832c7
AD
3190/**
3191 * cik_ring_test - basic gfx ring test
3192 *
3193 * @rdev: radeon_device pointer
3194 * @ring: radeon_ring structure holding ring information
3195 *
3196 * Allocate a scratch register and write to it using the gfx ring (CIK).
3197 * Provides a basic gfx ring test to verify that the ring is working.
3198 * Used by cik_cp_gfx_resume();
3199 * Returns 0 on success, error on failure.
3200 */
3201int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3202{
3203 uint32_t scratch;
3204 uint32_t tmp = 0;
3205 unsigned i;
3206 int r;
3207
3208 r = radeon_scratch_get(rdev, &scratch);
3209 if (r) {
3210 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3211 return r;
3212 }
3213 WREG32(scratch, 0xCAFEDEAD);
3214 r = radeon_ring_lock(rdev, ring, 3);
3215 if (r) {
3216 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3217 radeon_scratch_free(rdev, scratch);
3218 return r;
3219 }
3220 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3221 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3222 radeon_ring_write(ring, 0xDEADBEEF);
3223 radeon_ring_unlock_commit(rdev, ring);
963e81f9 3224
fbc832c7
AD
3225 for (i = 0; i < rdev->usec_timeout; i++) {
3226 tmp = RREG32(scratch);
3227 if (tmp == 0xDEADBEEF)
3228 break;
3229 DRM_UDELAY(1);
3230 }
3231 if (i < rdev->usec_timeout) {
3232 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3233 } else {
3234 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3235 ring->idx, scratch, tmp);
3236 r = -EINVAL;
3237 }
3238 radeon_scratch_free(rdev, scratch);
3239 return r;
3240}
3241
2cae3bc3 3242/**
b07fdd38 3243 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2cae3bc3
AD
3244 *
3245 * @rdev: radeon_device pointer
3246 * @fence: radeon fence object
3247 *
3248 * Emits a fence sequnce number on the gfx ring and flushes
3249 * GPU caches.
3250 */
b07fdd38
AD
3251void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3252 struct radeon_fence *fence)
2cae3bc3
AD
3253{
3254 struct radeon_ring *ring = &rdev->ring[fence->ring];
3255 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3256
3257 /* EVENT_WRITE_EOP - flush caches, send int */
3258 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3259 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3260 EOP_TC_ACTION_EN |
3261 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3262 EVENT_INDEX(5)));
3263 radeon_ring_write(ring, addr & 0xfffffffc);
3264 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3265 radeon_ring_write(ring, fence->seq);
3266 radeon_ring_write(ring, 0);
3267 /* HDP flush */
3268 /* We should be using the new WAIT_REG_MEM special op packet here
3269 * but it causes the CP to hang
3270 */
3271 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3272 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3273 WRITE_DATA_DST_SEL(0)));
3274 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3275 radeon_ring_write(ring, 0);
3276 radeon_ring_write(ring, 0);
3277}
3278
b07fdd38
AD
3279/**
3280 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3281 *
3282 * @rdev: radeon_device pointer
3283 * @fence: radeon fence object
3284 *
3285 * Emits a fence sequnce number on the compute ring and flushes
3286 * GPU caches.
3287 */
3288void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3289 struct radeon_fence *fence)
3290{
3291 struct radeon_ring *ring = &rdev->ring[fence->ring];
3292 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3293
3294 /* RELEASE_MEM - flush caches, send int */
3295 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3296 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3297 EOP_TC_ACTION_EN |
3298 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3299 EVENT_INDEX(5)));
3300 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3301 radeon_ring_write(ring, addr & 0xfffffffc);
3302 radeon_ring_write(ring, upper_32_bits(addr));
3303 radeon_ring_write(ring, fence->seq);
3304 radeon_ring_write(ring, 0);
3305 /* HDP flush */
3306 /* We should be using the new WAIT_REG_MEM special op packet here
3307 * but it causes the CP to hang
3308 */
3309 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3310 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3311 WRITE_DATA_DST_SEL(0)));
3312 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3313 radeon_ring_write(ring, 0);
3314 radeon_ring_write(ring, 0);
3315}
3316
2cae3bc3
AD
3317void cik_semaphore_ring_emit(struct radeon_device *rdev,
3318 struct radeon_ring *ring,
3319 struct radeon_semaphore *semaphore,
3320 bool emit_wait)
3321{
3322 uint64_t addr = semaphore->gpu_addr;
3323 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3324
3325 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3326 radeon_ring_write(ring, addr & 0xffffffff);
3327 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3328}
3329
c9dbd705
AD
3330/**
3331 * cik_copy_cpdma - copy pages using the CP DMA engine
3332 *
3333 * @rdev: radeon_device pointer
3334 * @src_offset: src GPU address
3335 * @dst_offset: dst GPU address
3336 * @num_gpu_pages: number of GPU pages to xfer
3337 * @fence: radeon fence object
3338 *
3339 * Copy GPU paging using the CP DMA engine (CIK+).
3340 * Used by the radeon ttm implementation to move pages if
3341 * registered as the asic copy callback.
3342 */
3343int cik_copy_cpdma(struct radeon_device *rdev,
3344 uint64_t src_offset, uint64_t dst_offset,
3345 unsigned num_gpu_pages,
3346 struct radeon_fence **fence)
3347{
3348 struct radeon_semaphore *sem = NULL;
3349 int ring_index = rdev->asic->copy.blit_ring_index;
3350 struct radeon_ring *ring = &rdev->ring[ring_index];
3351 u32 size_in_bytes, cur_size_in_bytes, control;
3352 int i, num_loops;
3353 int r = 0;
3354
3355 r = radeon_semaphore_create(rdev, &sem);
3356 if (r) {
3357 DRM_ERROR("radeon: moving bo (%d).\n", r);
3358 return r;
3359 }
3360
3361 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3362 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3363 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3364 if (r) {
3365 DRM_ERROR("radeon: moving bo (%d).\n", r);
3366 radeon_semaphore_free(rdev, &sem, NULL);
3367 return r;
3368 }
3369
3370 if (radeon_fence_need_sync(*fence, ring->idx)) {
3371 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3372 ring->idx);
3373 radeon_fence_note_sync(*fence, ring->idx);
3374 } else {
3375 radeon_semaphore_free(rdev, &sem, NULL);
3376 }
3377
3378 for (i = 0; i < num_loops; i++) {
3379 cur_size_in_bytes = size_in_bytes;
3380 if (cur_size_in_bytes > 0x1fffff)
3381 cur_size_in_bytes = 0x1fffff;
3382 size_in_bytes -= cur_size_in_bytes;
3383 control = 0;
3384 if (size_in_bytes == 0)
3385 control |= PACKET3_DMA_DATA_CP_SYNC;
3386 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3387 radeon_ring_write(ring, control);
3388 radeon_ring_write(ring, lower_32_bits(src_offset));
3389 radeon_ring_write(ring, upper_32_bits(src_offset));
3390 radeon_ring_write(ring, lower_32_bits(dst_offset));
3391 radeon_ring_write(ring, upper_32_bits(dst_offset));
3392 radeon_ring_write(ring, cur_size_in_bytes);
3393 src_offset += cur_size_in_bytes;
3394 dst_offset += cur_size_in_bytes;
3395 }
3396
3397 r = radeon_fence_emit(rdev, fence, ring->idx);
3398 if (r) {
3399 radeon_ring_unlock_undo(rdev, ring);
3400 return r;
3401 }
3402
3403 radeon_ring_unlock_commit(rdev, ring);
3404 radeon_semaphore_free(rdev, &sem, *fence);
3405
3406 return r;
3407}
3408
2cae3bc3
AD
3409/*
3410 * IB stuff
3411 */
3412/**
3413 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3414 *
3415 * @rdev: radeon_device pointer
3416 * @ib: radeon indirect buffer object
3417 *
3418 * Emits an DE (drawing engine) or CE (constant engine) IB
3419 * on the gfx ring. IBs are usually generated by userspace
3420 * acceleration drivers and submitted to the kernel for
3421 * sheduling on the ring. This function schedules the IB
3422 * on the gfx ring for execution by the GPU.
3423 */
3424void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3425{
3426 struct radeon_ring *ring = &rdev->ring[ib->ring];
3427 u32 header, control = INDIRECT_BUFFER_VALID;
3428
3429 if (ib->is_const_ib) {
3430 /* set switch buffer packet before const IB */
3431 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3432 radeon_ring_write(ring, 0);
3433
3434 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3435 } else {
3436 u32 next_rptr;
3437 if (ring->rptr_save_reg) {
3438 next_rptr = ring->wptr + 3 + 4;
3439 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3440 radeon_ring_write(ring, ((ring->rptr_save_reg -
3441 PACKET3_SET_UCONFIG_REG_START) >> 2));
3442 radeon_ring_write(ring, next_rptr);
3443 } else if (rdev->wb.enabled) {
3444 next_rptr = ring->wptr + 5 + 4;
3445 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3446 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3447 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3448 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3449 radeon_ring_write(ring, next_rptr);
3450 }
3451
3452 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3453 }
3454
3455 control |= ib->length_dw |
3456 (ib->vm ? (ib->vm->id << 24) : 0);
3457
3458 radeon_ring_write(ring, header);
3459 radeon_ring_write(ring,
3460#ifdef __BIG_ENDIAN
3461 (2 << 0) |
3462#endif
3463 (ib->gpu_addr & 0xFFFFFFFC));
3464 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3465 radeon_ring_write(ring, control);
3466}
3467
fbc832c7
AD
3468/**
3469 * cik_ib_test - basic gfx ring IB test
3470 *
3471 * @rdev: radeon_device pointer
3472 * @ring: radeon_ring structure holding ring information
3473 *
3474 * Allocate an IB and execute it on the gfx ring (CIK).
3475 * Provides a basic gfx ring test to verify that IBs are working.
3476 * Returns 0 on success, error on failure.
3477 */
3478int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3479{
3480 struct radeon_ib ib;
3481 uint32_t scratch;
3482 uint32_t tmp = 0;
3483 unsigned i;
3484 int r;
3485
3486 r = radeon_scratch_get(rdev, &scratch);
3487 if (r) {
3488 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3489 return r;
3490 }
3491 WREG32(scratch, 0xCAFEDEAD);
3492 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3493 if (r) {
3494 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
5510f124 3495 radeon_scratch_free(rdev, scratch);
fbc832c7
AD
3496 return r;
3497 }
3498 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3499 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3500 ib.ptr[2] = 0xDEADBEEF;
3501 ib.length_dw = 3;
3502 r = radeon_ib_schedule(rdev, &ib, NULL);
3503 if (r) {
3504 radeon_scratch_free(rdev, scratch);
3505 radeon_ib_free(rdev, &ib);
3506 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3507 return r;
3508 }
3509 r = radeon_fence_wait(ib.fence, false);
3510 if (r) {
3511 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
5510f124
CK
3512 radeon_scratch_free(rdev, scratch);
3513 radeon_ib_free(rdev, &ib);
fbc832c7
AD
3514 return r;
3515 }
3516 for (i = 0; i < rdev->usec_timeout; i++) {
3517 tmp = RREG32(scratch);
3518 if (tmp == 0xDEADBEEF)
3519 break;
3520 DRM_UDELAY(1);
3521 }
3522 if (i < rdev->usec_timeout) {
3523 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3524 } else {
3525 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3526 scratch, tmp);
3527 r = -EINVAL;
3528 }
3529 radeon_scratch_free(rdev, scratch);
3530 radeon_ib_free(rdev, &ib);
3531 return r;
3532}
3533
841cf442
AD
3534/*
3535 * CP.
3536 * On CIK, gfx and compute now have independant command processors.
3537 *
3538 * GFX
3539 * Gfx consists of a single ring and can process both gfx jobs and
3540 * compute jobs. The gfx CP consists of three microengines (ME):
3541 * PFP - Pre-Fetch Parser
3542 * ME - Micro Engine
3543 * CE - Constant Engine
3544 * The PFP and ME make up what is considered the Drawing Engine (DE).
3545 * The CE is an asynchronous engine used for updating buffer desciptors
3546 * used by the DE so that they can be loaded into cache in parallel
3547 * while the DE is processing state update packets.
3548 *
3549 * Compute
3550 * The compute CP consists of two microengines (ME):
3551 * MEC1 - Compute MicroEngine 1
3552 * MEC2 - Compute MicroEngine 2
3553 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3554 * The queues are exposed to userspace and are programmed directly
3555 * by the compute runtime.
3556 */
3557/**
3558 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3559 *
3560 * @rdev: radeon_device pointer
3561 * @enable: enable or disable the MEs
3562 *
3563 * Halts or unhalts the gfx MEs.
3564 */
3565static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3566{
3567 if (enable)
3568 WREG32(CP_ME_CNTL, 0);
3569 else {
3570 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3571 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3572 }
3573 udelay(50);
3574}
3575
3576/**
3577 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3578 *
3579 * @rdev: radeon_device pointer
3580 *
3581 * Loads the gfx PFP, ME, and CE ucode.
3582 * Returns 0 for success, -EINVAL if the ucode is not available.
3583 */
3584static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3585{
3586 const __be32 *fw_data;
3587 int i;
3588
3589 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3590 return -EINVAL;
3591
3592 cik_cp_gfx_enable(rdev, false);
3593
3594 /* PFP */
3595 fw_data = (const __be32 *)rdev->pfp_fw->data;
3596 WREG32(CP_PFP_UCODE_ADDR, 0);
3597 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3598 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3599 WREG32(CP_PFP_UCODE_ADDR, 0);
3600
3601 /* CE */
3602 fw_data = (const __be32 *)rdev->ce_fw->data;
3603 WREG32(CP_CE_UCODE_ADDR, 0);
3604 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3605 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3606 WREG32(CP_CE_UCODE_ADDR, 0);
3607
3608 /* ME */
3609 fw_data = (const __be32 *)rdev->me_fw->data;
3610 WREG32(CP_ME_RAM_WADDR, 0);
3611 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3612 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3613 WREG32(CP_ME_RAM_WADDR, 0);
3614
3615 WREG32(CP_PFP_UCODE_ADDR, 0);
3616 WREG32(CP_CE_UCODE_ADDR, 0);
3617 WREG32(CP_ME_RAM_WADDR, 0);
3618 WREG32(CP_ME_RAM_RADDR, 0);
3619 return 0;
3620}
3621
3622/**
3623 * cik_cp_gfx_start - start the gfx ring
3624 *
3625 * @rdev: radeon_device pointer
3626 *
3627 * Enables the ring and loads the clear state context and other
3628 * packets required to init the ring.
3629 * Returns 0 for success, error for failure.
3630 */
3631static int cik_cp_gfx_start(struct radeon_device *rdev)
3632{
3633 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3634 int r, i;
3635
3636 /* init the CP */
3637 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3638 WREG32(CP_ENDIAN_SWAP, 0);
3639 WREG32(CP_DEVICE_ID, 1);
3640
3641 cik_cp_gfx_enable(rdev, true);
3642
3643 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3644 if (r) {
3645 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3646 return r;
3647 }
3648
3649 /* init the CE partitions. CE only used for gfx on CIK */
3650 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3651 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3652 radeon_ring_write(ring, 0xc000);
3653 radeon_ring_write(ring, 0xc000);
3654
3655 /* setup clear context state */
3656 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3657 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3658
3659 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3660 radeon_ring_write(ring, 0x80000000);
3661 radeon_ring_write(ring, 0x80000000);
3662
3663 for (i = 0; i < cik_default_size; i++)
3664 radeon_ring_write(ring, cik_default_state[i]);
3665
3666 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3667 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3668
3669 /* set clear context state */
3670 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3671 radeon_ring_write(ring, 0);
3672
3673 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3674 radeon_ring_write(ring, 0x00000316);
3675 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3676 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3677
3678 radeon_ring_unlock_commit(rdev, ring);
3679
3680 return 0;
3681}
3682
3683/**
3684 * cik_cp_gfx_fini - stop the gfx ring
3685 *
3686 * @rdev: radeon_device pointer
3687 *
3688 * Stop the gfx ring and tear down the driver ring
3689 * info.
3690 */
3691static void cik_cp_gfx_fini(struct radeon_device *rdev)
3692{
3693 cik_cp_gfx_enable(rdev, false);
3694 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3695}
3696
3697/**
3698 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3699 *
3700 * @rdev: radeon_device pointer
3701 *
3702 * Program the location and size of the gfx ring buffer
3703 * and test it to make sure it's working.
3704 * Returns 0 for success, error for failure.
3705 */
3706static int cik_cp_gfx_resume(struct radeon_device *rdev)
3707{
3708 struct radeon_ring *ring;
3709 u32 tmp;
3710 u32 rb_bufsz;
3711 u64 rb_addr;
3712 int r;
3713
3714 WREG32(CP_SEM_WAIT_TIMER, 0x0);
939c0d3c
AD
3715 if (rdev->family != CHIP_HAWAII)
3716 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
841cf442
AD
3717
3718 /* Set the write pointer delay */
3719 WREG32(CP_RB_WPTR_DELAY, 0);
3720
3721 /* set the RB to use vmid 0 */
3722 WREG32(CP_RB_VMID, 0);
3723
3724 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3725
3726 /* ring 0 - compute and gfx */
3727 /* Set ring buffer size */
3728 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
b72a8925
DV
3729 rb_bufsz = order_base_2(ring->ring_size / 8);
3730 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
841cf442
AD
3731#ifdef __BIG_ENDIAN
3732 tmp |= BUF_SWAP_32BIT;
3733#endif
3734 WREG32(CP_RB0_CNTL, tmp);
3735
3736 /* Initialize the ring buffer's read and write pointers */
3737 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3738 ring->wptr = 0;
3739 WREG32(CP_RB0_WPTR, ring->wptr);
3740
3741 /* set the wb address wether it's enabled or not */
3742 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3743 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3744
3745 /* scratch register shadowing is no longer supported */
3746 WREG32(SCRATCH_UMSK, 0);
3747
3748 if (!rdev->wb.enabled)
3749 tmp |= RB_NO_UPDATE;
3750
3751 mdelay(1);
3752 WREG32(CP_RB0_CNTL, tmp);
3753
3754 rb_addr = ring->gpu_addr >> 8;
3755 WREG32(CP_RB0_BASE, rb_addr);
3756 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3757
3758 ring->rptr = RREG32(CP_RB0_RPTR);
3759
3760 /* start the ring */
3761 cik_cp_gfx_start(rdev);
3762 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3763 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3764 if (r) {
3765 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3766 return r;
3767 }
3768 return 0;
3769}
3770
963e81f9
AD
3771u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3772 struct radeon_ring *ring)
3773{
3774 u32 rptr;
3775
3776
3777
3778 if (rdev->wb.enabled) {
3779 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3780 } else {
f61d5b46 3781 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3782 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3783 rptr = RREG32(CP_HQD_PQ_RPTR);
3784 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3785 mutex_unlock(&rdev->srbm_mutex);
963e81f9 3786 }
963e81f9
AD
3787
3788 return rptr;
3789}
3790
3791u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3792 struct radeon_ring *ring)
3793{
3794 u32 wptr;
3795
3796 if (rdev->wb.enabled) {
3797 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3798 } else {
f61d5b46 3799 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3800 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3801 wptr = RREG32(CP_HQD_PQ_WPTR);
3802 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3803 mutex_unlock(&rdev->srbm_mutex);
963e81f9 3804 }
963e81f9
AD
3805
3806 return wptr;
3807}
3808
3809void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3810 struct radeon_ring *ring)
3811{
2e1e6dad
CK
3812 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3813 WDOORBELL32(ring->doorbell_offset, ring->wptr);
963e81f9
AD
3814}
3815
841cf442
AD
3816/**
3817 * cik_cp_compute_enable - enable/disable the compute CP MEs
3818 *
3819 * @rdev: radeon_device pointer
3820 * @enable: enable or disable the MEs
3821 *
3822 * Halts or unhalts the compute MEs.
3823 */
3824static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3825{
3826 if (enable)
3827 WREG32(CP_MEC_CNTL, 0);
3828 else
3829 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3830 udelay(50);
3831}
3832
3833/**
3834 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3835 *
3836 * @rdev: radeon_device pointer
3837 *
3838 * Loads the compute MEC1&2 ucode.
3839 * Returns 0 for success, -EINVAL if the ucode is not available.
3840 */
3841static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3842{
3843 const __be32 *fw_data;
3844 int i;
3845
3846 if (!rdev->mec_fw)
3847 return -EINVAL;
3848
3849 cik_cp_compute_enable(rdev, false);
3850
3851 /* MEC1 */
3852 fw_data = (const __be32 *)rdev->mec_fw->data;
3853 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3854 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3855 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3856 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3857
3858 if (rdev->family == CHIP_KAVERI) {
3859 /* MEC2 */
3860 fw_data = (const __be32 *)rdev->mec_fw->data;
3861 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3862 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3863 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3864 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3865 }
3866
3867 return 0;
3868}
3869
3870/**
3871 * cik_cp_compute_start - start the compute queues
3872 *
3873 * @rdev: radeon_device pointer
3874 *
3875 * Enable the compute queues.
3876 * Returns 0 for success, error for failure.
3877 */
3878static int cik_cp_compute_start(struct radeon_device *rdev)
3879{
963e81f9
AD
3880 cik_cp_compute_enable(rdev, true);
3881
841cf442
AD
3882 return 0;
3883}
3884
3885/**
3886 * cik_cp_compute_fini - stop the compute queues
3887 *
3888 * @rdev: radeon_device pointer
3889 *
3890 * Stop the compute queues and tear down the driver queue
3891 * info.
3892 */
3893static void cik_cp_compute_fini(struct radeon_device *rdev)
3894{
963e81f9
AD
3895 int i, idx, r;
3896
841cf442 3897 cik_cp_compute_enable(rdev, false);
963e81f9
AD
3898
3899 for (i = 0; i < 2; i++) {
3900 if (i == 0)
3901 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3902 else
3903 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3904
3905 if (rdev->ring[idx].mqd_obj) {
3906 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3907 if (unlikely(r != 0))
3908 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3909
3910 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3911 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3912
3913 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3914 rdev->ring[idx].mqd_obj = NULL;
3915 }
3916 }
841cf442
AD
3917}
3918
963e81f9
AD
3919static void cik_mec_fini(struct radeon_device *rdev)
3920{
3921 int r;
3922
3923 if (rdev->mec.hpd_eop_obj) {
3924 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3925 if (unlikely(r != 0))
3926 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3927 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3928 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3929
3930 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3931 rdev->mec.hpd_eop_obj = NULL;
3932 }
3933}
3934
3935#define MEC_HPD_SIZE 2048
3936
3937static int cik_mec_init(struct radeon_device *rdev)
3938{
3939 int r;
3940 u32 *hpd;
3941
3942 /*
3943 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3944 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3945 */
3946 if (rdev->family == CHIP_KAVERI)
3947 rdev->mec.num_mec = 2;
3948 else
3949 rdev->mec.num_mec = 1;
3950 rdev->mec.num_pipe = 4;
3951 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3952
3953 if (rdev->mec.hpd_eop_obj == NULL) {
3954 r = radeon_bo_create(rdev,
3955 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3956 PAGE_SIZE, true,
3957 RADEON_GEM_DOMAIN_GTT, NULL,
3958 &rdev->mec.hpd_eop_obj);
3959 if (r) {
3960 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3961 return r;
3962 }
3963 }
3964
3965 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3966 if (unlikely(r != 0)) {
3967 cik_mec_fini(rdev);
3968 return r;
3969 }
3970 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3971 &rdev->mec.hpd_eop_gpu_addr);
3972 if (r) {
3973 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3974 cik_mec_fini(rdev);
3975 return r;
3976 }
3977 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3978 if (r) {
3979 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3980 cik_mec_fini(rdev);
3981 return r;
3982 }
3983
3984 /* clear memory. Not sure if this is required or not */
3985 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3986
3987 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3988 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3989
3990 return 0;
3991}
3992
3993struct hqd_registers
3994{
3995 u32 cp_mqd_base_addr;
3996 u32 cp_mqd_base_addr_hi;
3997 u32 cp_hqd_active;
3998 u32 cp_hqd_vmid;
3999 u32 cp_hqd_persistent_state;
4000 u32 cp_hqd_pipe_priority;
4001 u32 cp_hqd_queue_priority;
4002 u32 cp_hqd_quantum;
4003 u32 cp_hqd_pq_base;
4004 u32 cp_hqd_pq_base_hi;
4005 u32 cp_hqd_pq_rptr;
4006 u32 cp_hqd_pq_rptr_report_addr;
4007 u32 cp_hqd_pq_rptr_report_addr_hi;
4008 u32 cp_hqd_pq_wptr_poll_addr;
4009 u32 cp_hqd_pq_wptr_poll_addr_hi;
4010 u32 cp_hqd_pq_doorbell_control;
4011 u32 cp_hqd_pq_wptr;
4012 u32 cp_hqd_pq_control;
4013 u32 cp_hqd_ib_base_addr;
4014 u32 cp_hqd_ib_base_addr_hi;
4015 u32 cp_hqd_ib_rptr;
4016 u32 cp_hqd_ib_control;
4017 u32 cp_hqd_iq_timer;
4018 u32 cp_hqd_iq_rptr;
4019 u32 cp_hqd_dequeue_request;
4020 u32 cp_hqd_dma_offload;
4021 u32 cp_hqd_sema_cmd;
4022 u32 cp_hqd_msg_type;
4023 u32 cp_hqd_atomic0_preop_lo;
4024 u32 cp_hqd_atomic0_preop_hi;
4025 u32 cp_hqd_atomic1_preop_lo;
4026 u32 cp_hqd_atomic1_preop_hi;
4027 u32 cp_hqd_hq_scheduler0;
4028 u32 cp_hqd_hq_scheduler1;
4029 u32 cp_mqd_control;
4030};
4031
4032struct bonaire_mqd
4033{
4034 u32 header;
4035 u32 dispatch_initiator;
4036 u32 dimensions[3];
4037 u32 start_idx[3];
4038 u32 num_threads[3];
4039 u32 pipeline_stat_enable;
4040 u32 perf_counter_enable;
4041 u32 pgm[2];
4042 u32 tba[2];
4043 u32 tma[2];
4044 u32 pgm_rsrc[2];
4045 u32 vmid;
4046 u32 resource_limits;
4047 u32 static_thread_mgmt01[2];
4048 u32 tmp_ring_size;
4049 u32 static_thread_mgmt23[2];
4050 u32 restart[3];
4051 u32 thread_trace_enable;
4052 u32 reserved1;
4053 u32 user_data[16];
4054 u32 vgtcs_invoke_count[2];
4055 struct hqd_registers queue_state;
4056 u32 dequeue_cntr;
4057 u32 interrupt_queue[64];
4058};
4059
841cf442
AD
4060/**
4061 * cik_cp_compute_resume - setup the compute queue registers
4062 *
4063 * @rdev: radeon_device pointer
4064 *
4065 * Program the compute queues and test them to make sure they
4066 * are working.
4067 * Returns 0 for success, error for failure.
4068 */
4069static int cik_cp_compute_resume(struct radeon_device *rdev)
4070{
963e81f9
AD
4071 int r, i, idx;
4072 u32 tmp;
4073 bool use_doorbell = true;
4074 u64 hqd_gpu_addr;
4075 u64 mqd_gpu_addr;
4076 u64 eop_gpu_addr;
4077 u64 wb_gpu_addr;
4078 u32 *buf;
4079 struct bonaire_mqd *mqd;
841cf442 4080
841cf442
AD
4081 r = cik_cp_compute_start(rdev);
4082 if (r)
4083 return r;
963e81f9
AD
4084
4085 /* fix up chicken bits */
4086 tmp = RREG32(CP_CPF_DEBUG);
4087 tmp |= (1 << 23);
4088 WREG32(CP_CPF_DEBUG, tmp);
4089
4090 /* init the pipes */
f61d5b46 4091 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
4092 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4093 int me = (i < 4) ? 1 : 2;
4094 int pipe = (i < 4) ? i : (i - 4);
4095
4096 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4097
4098 cik_srbm_select(rdev, me, pipe, 0, 0);
4099
4100 /* write the EOP addr */
4101 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4102 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4103
4104 /* set the VMID assigned */
4105 WREG32(CP_HPD_EOP_VMID, 0);
4106
4107 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4108 tmp = RREG32(CP_HPD_EOP_CONTROL);
4109 tmp &= ~EOP_SIZE_MASK;
b72a8925 4110 tmp |= order_base_2(MEC_HPD_SIZE / 8);
963e81f9
AD
4111 WREG32(CP_HPD_EOP_CONTROL, tmp);
4112 }
4113 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 4114 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
4115
4116 /* init the queues. Just two for now. */
4117 for (i = 0; i < 2; i++) {
4118 if (i == 0)
4119 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4120 else
4121 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4122
4123 if (rdev->ring[idx].mqd_obj == NULL) {
4124 r = radeon_bo_create(rdev,
4125 sizeof(struct bonaire_mqd),
4126 PAGE_SIZE, true,
4127 RADEON_GEM_DOMAIN_GTT, NULL,
4128 &rdev->ring[idx].mqd_obj);
4129 if (r) {
4130 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4131 return r;
4132 }
4133 }
4134
4135 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4136 if (unlikely(r != 0)) {
4137 cik_cp_compute_fini(rdev);
4138 return r;
4139 }
4140 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4141 &mqd_gpu_addr);
4142 if (r) {
4143 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4144 cik_cp_compute_fini(rdev);
4145 return r;
4146 }
4147 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4148 if (r) {
4149 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4150 cik_cp_compute_fini(rdev);
4151 return r;
4152 }
4153
4154 /* doorbell offset */
4155 rdev->ring[idx].doorbell_offset =
4156 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
4157
4158 /* init the mqd struct */
4159 memset(buf, 0, sizeof(struct bonaire_mqd));
4160
4161 mqd = (struct bonaire_mqd *)buf;
4162 mqd->header = 0xC0310800;
4163 mqd->static_thread_mgmt01[0] = 0xffffffff;
4164 mqd->static_thread_mgmt01[1] = 0xffffffff;
4165 mqd->static_thread_mgmt23[0] = 0xffffffff;
4166 mqd->static_thread_mgmt23[1] = 0xffffffff;
4167
f61d5b46 4168 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
4169 cik_srbm_select(rdev, rdev->ring[idx].me,
4170 rdev->ring[idx].pipe,
4171 rdev->ring[idx].queue, 0);
4172
4173 /* disable wptr polling */
4174 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4175 tmp &= ~WPTR_POLL_EN;
4176 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4177
4178 /* enable doorbell? */
4179 mqd->queue_state.cp_hqd_pq_doorbell_control =
4180 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4181 if (use_doorbell)
4182 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4183 else
4184 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4185 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4186 mqd->queue_state.cp_hqd_pq_doorbell_control);
4187
4188 /* disable the queue if it's active */
4189 mqd->queue_state.cp_hqd_dequeue_request = 0;
4190 mqd->queue_state.cp_hqd_pq_rptr = 0;
4191 mqd->queue_state.cp_hqd_pq_wptr= 0;
4192 if (RREG32(CP_HQD_ACTIVE) & 1) {
4193 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4194 for (i = 0; i < rdev->usec_timeout; i++) {
4195 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4196 break;
4197 udelay(1);
4198 }
4199 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4200 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4201 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4202 }
4203
4204 /* set the pointer to the MQD */
4205 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4206 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4207 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4208 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4209 /* set MQD vmid to 0 */
4210 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4211 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4212 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4213
4214 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4215 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4216 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4217 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4218 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4219 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4220
4221 /* set up the HQD, this is similar to CP_RB0_CNTL */
4222 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4223 mqd->queue_state.cp_hqd_pq_control &=
4224 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4225
4226 mqd->queue_state.cp_hqd_pq_control |=
b72a8925 4227 order_base_2(rdev->ring[idx].ring_size / 8);
963e81f9 4228 mqd->queue_state.cp_hqd_pq_control |=
b72a8925 4229 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
963e81f9
AD
4230#ifdef __BIG_ENDIAN
4231 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4232#endif
4233 mqd->queue_state.cp_hqd_pq_control &=
4234 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4235 mqd->queue_state.cp_hqd_pq_control |=
4236 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4237 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4238
4239 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4240 if (i == 0)
4241 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4242 else
4243 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4244 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4245 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4246 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4247 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4248 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4249
4250 /* set the wb address wether it's enabled or not */
4251 if (i == 0)
4252 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4253 else
4254 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4255 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4256 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4257 upper_32_bits(wb_gpu_addr) & 0xffff;
4258 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4259 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4260 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4261 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4262
4263 /* enable the doorbell if requested */
4264 if (use_doorbell) {
4265 mqd->queue_state.cp_hqd_pq_doorbell_control =
4266 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4267 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4268 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4269 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
4270 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4271 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4272 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4273
4274 } else {
4275 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4276 }
4277 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4278 mqd->queue_state.cp_hqd_pq_doorbell_control);
4279
4280 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4281 rdev->ring[idx].wptr = 0;
4282 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4283 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4284 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4285 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4286
4287 /* set the vmid for the queue */
4288 mqd->queue_state.cp_hqd_vmid = 0;
4289 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4290
4291 /* activate the queue */
4292 mqd->queue_state.cp_hqd_active = 1;
4293 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4294
4295 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 4296 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
4297
4298 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4299 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4300
4301 rdev->ring[idx].ready = true;
4302 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4303 if (r)
4304 rdev->ring[idx].ready = false;
4305 }
4306
841cf442
AD
4307 return 0;
4308}
4309
841cf442
AD
4310static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4311{
4312 cik_cp_gfx_enable(rdev, enable);
4313 cik_cp_compute_enable(rdev, enable);
4314}
4315
841cf442
AD
4316static int cik_cp_load_microcode(struct radeon_device *rdev)
4317{
4318 int r;
4319
4320 r = cik_cp_gfx_load_microcode(rdev);
4321 if (r)
4322 return r;
4323 r = cik_cp_compute_load_microcode(rdev);
4324 if (r)
4325 return r;
4326
4327 return 0;
4328}
4329
841cf442
AD
4330static void cik_cp_fini(struct radeon_device *rdev)
4331{
4332 cik_cp_gfx_fini(rdev);
4333 cik_cp_compute_fini(rdev);
4334}
4335
841cf442
AD
4336static int cik_cp_resume(struct radeon_device *rdev)
4337{
4338 int r;
4339
4214faf6
AD
4340 cik_enable_gui_idle_interrupt(rdev, false);
4341
841cf442
AD
4342 r = cik_cp_load_microcode(rdev);
4343 if (r)
4344 return r;
4345
4346 r = cik_cp_gfx_resume(rdev);
4347 if (r)
4348 return r;
4349 r = cik_cp_compute_resume(rdev);
4350 if (r)
4351 return r;
4352
4214faf6
AD
4353 cik_enable_gui_idle_interrupt(rdev, true);
4354
841cf442
AD
4355 return 0;
4356}
4357
cc066715 4358static void cik_print_gpu_status_regs(struct radeon_device *rdev)
6f2043ce 4359{
6f2043ce
AD
4360 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4361 RREG32(GRBM_STATUS));
4362 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4363 RREG32(GRBM_STATUS2));
4364 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4365 RREG32(GRBM_STATUS_SE0));
4366 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4367 RREG32(GRBM_STATUS_SE1));
4368 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4369 RREG32(GRBM_STATUS_SE2));
4370 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4371 RREG32(GRBM_STATUS_SE3));
4372 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4373 RREG32(SRBM_STATUS));
4374 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4375 RREG32(SRBM_STATUS2));
cc066715
AD
4376 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4377 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4378 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4379 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
963e81f9
AD
4380 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4381 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4382 RREG32(CP_STALLED_STAT1));
4383 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4384 RREG32(CP_STALLED_STAT2));
4385 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4386 RREG32(CP_STALLED_STAT3));
4387 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4388 RREG32(CP_CPF_BUSY_STAT));
4389 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4390 RREG32(CP_CPF_STALLED_STAT1));
4391 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4392 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4393 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4394 RREG32(CP_CPC_STALLED_STAT1));
4395 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
cc066715 4396}
6f2043ce 4397
21a93e13 4398/**
cc066715 4399 * cik_gpu_check_soft_reset - check which blocks are busy
21a93e13
AD
4400 *
4401 * @rdev: radeon_device pointer
21a93e13 4402 *
cc066715
AD
4403 * Check which blocks are busy and return the relevant reset
4404 * mask to be used by cik_gpu_soft_reset().
4405 * Returns a mask of the blocks to be reset.
21a93e13 4406 */
2483b4ea 4407u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
21a93e13 4408{
cc066715
AD
4409 u32 reset_mask = 0;
4410 u32 tmp;
21a93e13 4411
cc066715
AD
4412 /* GRBM_STATUS */
4413 tmp = RREG32(GRBM_STATUS);
4414 if (tmp & (PA_BUSY | SC_BUSY |
4415 BCI_BUSY | SX_BUSY |
4416 TA_BUSY | VGT_BUSY |
4417 DB_BUSY | CB_BUSY |
4418 GDS_BUSY | SPI_BUSY |
4419 IA_BUSY | IA_BUSY_NO_DMA))
4420 reset_mask |= RADEON_RESET_GFX;
21a93e13 4421
cc066715
AD
4422 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4423 reset_mask |= RADEON_RESET_CP;
21a93e13 4424
cc066715
AD
4425 /* GRBM_STATUS2 */
4426 tmp = RREG32(GRBM_STATUS2);
4427 if (tmp & RLC_BUSY)
4428 reset_mask |= RADEON_RESET_RLC;
21a93e13 4429
cc066715
AD
4430 /* SDMA0_STATUS_REG */
4431 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4432 if (!(tmp & SDMA_IDLE))
4433 reset_mask |= RADEON_RESET_DMA;
21a93e13 4434
cc066715
AD
4435 /* SDMA1_STATUS_REG */
4436 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4437 if (!(tmp & SDMA_IDLE))
4438 reset_mask |= RADEON_RESET_DMA1;
21a93e13 4439
cc066715
AD
4440 /* SRBM_STATUS2 */
4441 tmp = RREG32(SRBM_STATUS2);
4442 if (tmp & SDMA_BUSY)
4443 reset_mask |= RADEON_RESET_DMA;
21a93e13 4444
cc066715
AD
4445 if (tmp & SDMA1_BUSY)
4446 reset_mask |= RADEON_RESET_DMA1;
21a93e13 4447
cc066715
AD
4448 /* SRBM_STATUS */
4449 tmp = RREG32(SRBM_STATUS);
21a93e13 4450
cc066715
AD
4451 if (tmp & IH_BUSY)
4452 reset_mask |= RADEON_RESET_IH;
21a93e13 4453
cc066715
AD
4454 if (tmp & SEM_BUSY)
4455 reset_mask |= RADEON_RESET_SEM;
21a93e13 4456
cc066715
AD
4457 if (tmp & GRBM_RQ_PENDING)
4458 reset_mask |= RADEON_RESET_GRBM;
21a93e13 4459
cc066715
AD
4460 if (tmp & VMC_BUSY)
4461 reset_mask |= RADEON_RESET_VMC;
21a93e13 4462
cc066715
AD
4463 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4464 MCC_BUSY | MCD_BUSY))
4465 reset_mask |= RADEON_RESET_MC;
21a93e13 4466
cc066715
AD
4467 if (evergreen_is_display_hung(rdev))
4468 reset_mask |= RADEON_RESET_DISPLAY;
4469
4470 /* Skip MC reset as it's mostly likely not hung, just busy */
4471 if (reset_mask & RADEON_RESET_MC) {
4472 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4473 reset_mask &= ~RADEON_RESET_MC;
21a93e13 4474 }
cc066715
AD
4475
4476 return reset_mask;
21a93e13
AD
4477}
4478
4479/**
cc066715 4480 * cik_gpu_soft_reset - soft reset GPU
21a93e13
AD
4481 *
4482 * @rdev: radeon_device pointer
cc066715 4483 * @reset_mask: mask of which blocks to reset
21a93e13 4484 *
cc066715 4485 * Soft reset the blocks specified in @reset_mask.
21a93e13 4486 */
cc066715 4487static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
21a93e13 4488{
6f2043ce 4489 struct evergreen_mc_save save;
cc066715
AD
4490 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4491 u32 tmp;
21a93e13 4492
cc066715
AD
4493 if (reset_mask == 0)
4494 return;
21a93e13 4495
cc066715 4496 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
21a93e13 4497
cc066715
AD
4498 cik_print_gpu_status_regs(rdev);
4499 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4500 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4501 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4502 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
21a93e13 4503
fb2c7f4d
AD
4504 /* disable CG/PG */
4505 cik_fini_pg(rdev);
4506 cik_fini_cg(rdev);
4507
cc066715
AD
4508 /* stop the rlc */
4509 cik_rlc_stop(rdev);
21a93e13 4510
cc066715
AD
4511 /* Disable GFX parsing/prefetching */
4512 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
21a93e13 4513
cc066715
AD
4514 /* Disable MEC parsing/prefetching */
4515 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
21a93e13 4516
cc066715
AD
4517 if (reset_mask & RADEON_RESET_DMA) {
4518 /* sdma0 */
4519 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4520 tmp |= SDMA_HALT;
4521 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4522 }
4523 if (reset_mask & RADEON_RESET_DMA1) {
4524 /* sdma1 */
4525 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4526 tmp |= SDMA_HALT;
4527 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4528 }
21a93e13 4529
6f2043ce 4530 evergreen_mc_stop(rdev, &save);
cc066715 4531 if (evergreen_mc_wait_for_idle(rdev)) {
6f2043ce
AD
4532 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4533 }
21a93e13 4534
cc066715
AD
4535 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4536 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
21a93e13 4537
cc066715
AD
4538 if (reset_mask & RADEON_RESET_CP) {
4539 grbm_soft_reset |= SOFT_RESET_CP;
21a93e13 4540
cc066715
AD
4541 srbm_soft_reset |= SOFT_RESET_GRBM;
4542 }
21a93e13 4543
cc066715
AD
4544 if (reset_mask & RADEON_RESET_DMA)
4545 srbm_soft_reset |= SOFT_RESET_SDMA;
21a93e13 4546
cc066715
AD
4547 if (reset_mask & RADEON_RESET_DMA1)
4548 srbm_soft_reset |= SOFT_RESET_SDMA1;
4549
4550 if (reset_mask & RADEON_RESET_DISPLAY)
4551 srbm_soft_reset |= SOFT_RESET_DC;
4552
4553 if (reset_mask & RADEON_RESET_RLC)
4554 grbm_soft_reset |= SOFT_RESET_RLC;
4555
4556 if (reset_mask & RADEON_RESET_SEM)
4557 srbm_soft_reset |= SOFT_RESET_SEM;
4558
4559 if (reset_mask & RADEON_RESET_IH)
4560 srbm_soft_reset |= SOFT_RESET_IH;
4561
4562 if (reset_mask & RADEON_RESET_GRBM)
4563 srbm_soft_reset |= SOFT_RESET_GRBM;
4564
4565 if (reset_mask & RADEON_RESET_VMC)
4566 srbm_soft_reset |= SOFT_RESET_VMC;
4567
4568 if (!(rdev->flags & RADEON_IS_IGP)) {
4569 if (reset_mask & RADEON_RESET_MC)
4570 srbm_soft_reset |= SOFT_RESET_MC;
21a93e13
AD
4571 }
4572
cc066715
AD
4573 if (grbm_soft_reset) {
4574 tmp = RREG32(GRBM_SOFT_RESET);
4575 tmp |= grbm_soft_reset;
4576 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4577 WREG32(GRBM_SOFT_RESET, tmp);
4578 tmp = RREG32(GRBM_SOFT_RESET);
21a93e13 4579
cc066715 4580 udelay(50);
21a93e13 4581
cc066715
AD
4582 tmp &= ~grbm_soft_reset;
4583 WREG32(GRBM_SOFT_RESET, tmp);
4584 tmp = RREG32(GRBM_SOFT_RESET);
4585 }
21a93e13 4586
cc066715
AD
4587 if (srbm_soft_reset) {
4588 tmp = RREG32(SRBM_SOFT_RESET);
4589 tmp |= srbm_soft_reset;
4590 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4591 WREG32(SRBM_SOFT_RESET, tmp);
4592 tmp = RREG32(SRBM_SOFT_RESET);
21a93e13 4593
cc066715 4594 udelay(50);
21a93e13 4595
cc066715
AD
4596 tmp &= ~srbm_soft_reset;
4597 WREG32(SRBM_SOFT_RESET, tmp);
4598 tmp = RREG32(SRBM_SOFT_RESET);
4599 }
21a93e13 4600
6f2043ce
AD
4601 /* Wait a little for things to settle down */
4602 udelay(50);
21a93e13 4603
6f2043ce 4604 evergreen_mc_resume(rdev, &save);
cc066715
AD
4605 udelay(50);
4606
4607 cik_print_gpu_status_regs(rdev);
21a93e13
AD
4608}
4609
4610/**
cc066715 4611 * cik_asic_reset - soft reset GPU
21a93e13
AD
4612 *
4613 * @rdev: radeon_device pointer
4614 *
cc066715
AD
4615 * Look up which blocks are hung and attempt
4616 * to reset them.
6f2043ce 4617 * Returns 0 for success.
21a93e13 4618 */
6f2043ce 4619int cik_asic_reset(struct radeon_device *rdev)
21a93e13 4620{
cc066715 4621 u32 reset_mask;
21a93e13 4622
cc066715 4623 reset_mask = cik_gpu_check_soft_reset(rdev);
21a93e13 4624
cc066715
AD
4625 if (reset_mask)
4626 r600_set_bios_scratch_engine_hung(rdev, true);
21a93e13 4627
cc066715 4628 cik_gpu_soft_reset(rdev, reset_mask);
21a93e13 4629
cc066715
AD
4630 reset_mask = cik_gpu_check_soft_reset(rdev);
4631
4632 if (!reset_mask)
4633 r600_set_bios_scratch_engine_hung(rdev, false);
21a93e13
AD
4634
4635 return 0;
4636}
4637
4638/**
cc066715 4639 * cik_gfx_is_lockup - check if the 3D engine is locked up
21a93e13
AD
4640 *
4641 * @rdev: radeon_device pointer
cc066715 4642 * @ring: radeon_ring structure holding ring information
21a93e13 4643 *
cc066715
AD
4644 * Check if the 3D engine is locked up (CIK).
4645 * Returns true if the engine is locked, false if not.
21a93e13 4646 */
cc066715 4647bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
21a93e13 4648{
cc066715 4649 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
21a93e13 4650
cc066715
AD
4651 if (!(reset_mask & (RADEON_RESET_GFX |
4652 RADEON_RESET_COMPUTE |
4653 RADEON_RESET_CP))) {
4654 radeon_ring_lockup_update(ring);
4655 return false;
21a93e13 4656 }
cc066715
AD
4657 /* force CP activities */
4658 radeon_ring_force_activity(rdev, ring);
4659 return radeon_ring_test_lockup(rdev, ring);
21a93e13
AD
4660}
4661
1c49165d 4662/* MC */
21a93e13 4663/**
1c49165d 4664 * cik_mc_program - program the GPU memory controller
21a93e13
AD
4665 *
4666 * @rdev: radeon_device pointer
21a93e13 4667 *
1c49165d
AD
4668 * Set the location of vram, gart, and AGP in the GPU's
4669 * physical address space (CIK).
21a93e13 4670 */
1c49165d 4671static void cik_mc_program(struct radeon_device *rdev)
21a93e13 4672{
1c49165d 4673 struct evergreen_mc_save save;
21a93e13 4674 u32 tmp;
1c49165d 4675 int i, j;
21a93e13 4676
1c49165d
AD
4677 /* Initialize HDP */
4678 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4679 WREG32((0x2c14 + j), 0x00000000);
4680 WREG32((0x2c18 + j), 0x00000000);
4681 WREG32((0x2c1c + j), 0x00000000);
4682 WREG32((0x2c20 + j), 0x00000000);
4683 WREG32((0x2c24 + j), 0x00000000);
21a93e13 4684 }
1c49165d 4685 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
21a93e13 4686
1c49165d
AD
4687 evergreen_mc_stop(rdev, &save);
4688 if (radeon_mc_wait_for_idle(rdev)) {
4689 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
21a93e13 4690 }
1c49165d
AD
4691 /* Lockout access through VGA aperture*/
4692 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4693 /* Update configuration */
4694 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4695 rdev->mc.vram_start >> 12);
4696 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4697 rdev->mc.vram_end >> 12);
4698 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4699 rdev->vram_scratch.gpu_addr >> 12);
4700 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4701 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4702 WREG32(MC_VM_FB_LOCATION, tmp);
4703 /* XXX double check these! */
4704 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4705 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4706 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4707 WREG32(MC_VM_AGP_BASE, 0);
4708 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4709 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4710 if (radeon_mc_wait_for_idle(rdev)) {
4711 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
21a93e13 4712 }
1c49165d
AD
4713 evergreen_mc_resume(rdev, &save);
4714 /* we need to own VRAM, so turn off the VGA renderer here
4715 * to stop it overwriting our objects */
4716 rv515_vga_render_disable(rdev);
21a93e13
AD
4717}
4718
4719/**
1c49165d 4720 * cik_mc_init - initialize the memory controller driver params
21a93e13
AD
4721 *
4722 * @rdev: radeon_device pointer
21a93e13 4723 *
1c49165d
AD
4724 * Look up the amount of vram, vram width, and decide how to place
4725 * vram and gart within the GPU's physical address space (CIK).
4726 * Returns 0 for success.
21a93e13 4727 */
1c49165d 4728static int cik_mc_init(struct radeon_device *rdev)
21a93e13 4729{
1c49165d
AD
4730 u32 tmp;
4731 int chansize, numchan;
21a93e13 4732
1c49165d
AD
4733 /* Get VRAM informations */
4734 rdev->mc.vram_is_ddr = true;
4735 tmp = RREG32(MC_ARB_RAMCFG);
4736 if (tmp & CHANSIZE_MASK) {
4737 chansize = 64;
21a93e13 4738 } else {
1c49165d 4739 chansize = 32;
21a93e13 4740 }
1c49165d
AD
4741 tmp = RREG32(MC_SHARED_CHMAP);
4742 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4743 case 0:
4744 default:
4745 numchan = 1;
4746 break;
4747 case 1:
4748 numchan = 2;
4749 break;
4750 case 2:
4751 numchan = 4;
4752 break;
4753 case 3:
4754 numchan = 8;
4755 break;
4756 case 4:
4757 numchan = 3;
4758 break;
4759 case 5:
4760 numchan = 6;
4761 break;
4762 case 6:
4763 numchan = 10;
4764 break;
4765 case 7:
4766 numchan = 12;
4767 break;
4768 case 8:
4769 numchan = 16;
4770 break;
4771 }
4772 rdev->mc.vram_width = numchan * chansize;
4773 /* Could aper size report 0 ? */
4774 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4775 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4776 /* size in MB on si */
13c5bfda
AD
4777 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4778 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
1c49165d
AD
4779 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4780 si_vram_gtt_location(rdev, &rdev->mc);
4781 radeon_update_bandwidth_info(rdev);
4782
4783 return 0;
4784}
4785
4786/*
4787 * GART
4788 * VMID 0 is the physical GPU addresses as used by the kernel.
4789 * VMIDs 1-15 are used for userspace clients and are handled
4790 * by the radeon vm/hsa code.
4791 */
4792/**
4793 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4794 *
4795 * @rdev: radeon_device pointer
4796 *
4797 * Flush the TLB for the VMID 0 page table (CIK).
4798 */
4799void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4800{
4801 /* flush hdp cache */
4802 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4803
4804 /* bits 0-15 are the VM contexts0-15 */
4805 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4806}
4807
4808/**
4809 * cik_pcie_gart_enable - gart enable
4810 *
4811 * @rdev: radeon_device pointer
4812 *
4813 * This sets up the TLBs, programs the page tables for VMID0,
4814 * sets up the hw for VMIDs 1-15 which are allocated on
4815 * demand, and sets up the global locations for the LDS, GDS,
4816 * and GPUVM for FSA64 clients (CIK).
4817 * Returns 0 for success, errors for failure.
4818 */
4819static int cik_pcie_gart_enable(struct radeon_device *rdev)
4820{
4821 int r, i;
4822
4823 if (rdev->gart.robj == NULL) {
4824 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4825 return -EINVAL;
4826 }
4827 r = radeon_gart_table_vram_pin(rdev);
4828 if (r)
4829 return r;
4830 radeon_gart_restore(rdev);
4831 /* Setup TLB control */
4832 WREG32(MC_VM_MX_L1_TLB_CNTL,
4833 (0xA << 7) |
4834 ENABLE_L1_TLB |
4835 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4836 ENABLE_ADVANCED_DRIVER_MODEL |
4837 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4838 /* Setup L2 cache */
4839 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4840 ENABLE_L2_FRAGMENT_PROCESSING |
4841 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4842 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4843 EFFECTIVE_L2_QUEUE_SIZE(7) |
4844 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4845 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4846 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4847 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4848 /* setup context0 */
4849 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4850 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4851 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4852 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4853 (u32)(rdev->dummy_page.addr >> 12));
4854 WREG32(VM_CONTEXT0_CNTL2, 0);
4855 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4856 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4857
4858 WREG32(0x15D4, 0);
4859 WREG32(0x15D8, 0);
4860 WREG32(0x15DC, 0);
4861
4862 /* empty context1-15 */
4863 /* FIXME start with 4G, once using 2 level pt switch to full
4864 * vm size space
4865 */
4866 /* set vm size, must be a multiple of 4 */
4867 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4868 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4869 for (i = 1; i < 16; i++) {
4870 if (i < 8)
4871 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4872 rdev->gart.table_addr >> 12);
4873 else
4874 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4875 rdev->gart.table_addr >> 12);
4876 }
4877
4878 /* enable context1-15 */
4879 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4880 (u32)(rdev->dummy_page.addr >> 12));
a00024b0 4881 WREG32(VM_CONTEXT1_CNTL2, 4);
1c49165d 4882 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
a00024b0
AD
4883 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4884 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4885 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4886 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4887 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4888 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4889 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4890 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4891 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4892 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4893 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4894 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1c49165d
AD
4895
4896 /* TC cache setup ??? */
4897 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4898 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4899 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4900
4901 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4902 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4903 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4904 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4905 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4906
4907 WREG32(TC_CFG_L1_VOLATILE, 0);
4908 WREG32(TC_CFG_L2_VOLATILE, 0);
4909
4910 if (rdev->family == CHIP_KAVERI) {
4911 u32 tmp = RREG32(CHUB_CONTROL);
4912 tmp &= ~BYPASS_VM;
4913 WREG32(CHUB_CONTROL, tmp);
4914 }
4915
4916 /* XXX SH_MEM regs */
4917 /* where to put LDS, scratch, GPUVM in FSA64 space */
f61d5b46 4918 mutex_lock(&rdev->srbm_mutex);
1c49165d 4919 for (i = 0; i < 16; i++) {
b556b12e 4920 cik_srbm_select(rdev, 0, 0, 0, i);
21a93e13 4921 /* CP and shaders */
1c49165d
AD
4922 WREG32(SH_MEM_CONFIG, 0);
4923 WREG32(SH_MEM_APE1_BASE, 1);
4924 WREG32(SH_MEM_APE1_LIMIT, 0);
4925 WREG32(SH_MEM_BASES, 0);
21a93e13
AD
4926 /* SDMA GFX */
4927 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4928 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4929 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4930 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4931 /* XXX SDMA RLC - todo */
1c49165d 4932 }
b556b12e 4933 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 4934 mutex_unlock(&rdev->srbm_mutex);
1c49165d
AD
4935
4936 cik_pcie_gart_tlb_flush(rdev);
4937 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4938 (unsigned)(rdev->mc.gtt_size >> 20),
4939 (unsigned long long)rdev->gart.table_addr);
4940 rdev->gart.ready = true;
4941 return 0;
4942}
4943
4944/**
4945 * cik_pcie_gart_disable - gart disable
4946 *
4947 * @rdev: radeon_device pointer
4948 *
4949 * This disables all VM page table (CIK).
4950 */
4951static void cik_pcie_gart_disable(struct radeon_device *rdev)
4952{
4953 /* Disable all tables */
4954 WREG32(VM_CONTEXT0_CNTL, 0);
4955 WREG32(VM_CONTEXT1_CNTL, 0);
4956 /* Setup TLB control */
4957 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4958 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4959 /* Setup L2 cache */
4960 WREG32(VM_L2_CNTL,
4961 ENABLE_L2_FRAGMENT_PROCESSING |
4962 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4963 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4964 EFFECTIVE_L2_QUEUE_SIZE(7) |
4965 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4966 WREG32(VM_L2_CNTL2, 0);
4967 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4968 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4969 radeon_gart_table_vram_unpin(rdev);
4970}
4971
4972/**
4973 * cik_pcie_gart_fini - vm fini callback
4974 *
4975 * @rdev: radeon_device pointer
4976 *
4977 * Tears down the driver GART/VM setup (CIK).
4978 */
4979static void cik_pcie_gart_fini(struct radeon_device *rdev)
4980{
4981 cik_pcie_gart_disable(rdev);
4982 radeon_gart_table_vram_free(rdev);
4983 radeon_gart_fini(rdev);
4984}
4985
4986/* vm parser */
4987/**
4988 * cik_ib_parse - vm ib_parse callback
4989 *
4990 * @rdev: radeon_device pointer
4991 * @ib: indirect buffer pointer
4992 *
4993 * CIK uses hw IB checking so this is a nop (CIK).
4994 */
4995int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4996{
4997 return 0;
4998}
4999
5000/*
5001 * vm
5002 * VMID 0 is the physical GPU addresses as used by the kernel.
5003 * VMIDs 1-15 are used for userspace clients and are handled
5004 * by the radeon vm/hsa code.
5005 */
5006/**
5007 * cik_vm_init - cik vm init callback
5008 *
5009 * @rdev: radeon_device pointer
5010 *
5011 * Inits cik specific vm parameters (number of VMs, base of vram for
5012 * VMIDs 1-15) (CIK).
5013 * Returns 0 for success.
5014 */
5015int cik_vm_init(struct radeon_device *rdev)
5016{
5017 /* number of VMs */
5018 rdev->vm_manager.nvm = 16;
5019 /* base offset of vram pages */
5020 if (rdev->flags & RADEON_IS_IGP) {
5021 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5022 tmp <<= 22;
5023 rdev->vm_manager.vram_base_offset = tmp;
5024 } else
5025 rdev->vm_manager.vram_base_offset = 0;
5026
5027 return 0;
5028}
5029
5030/**
5031 * cik_vm_fini - cik vm fini callback
5032 *
5033 * @rdev: radeon_device pointer
5034 *
5035 * Tear down any asic specific VM setup (CIK).
5036 */
5037void cik_vm_fini(struct radeon_device *rdev)
5038{
5039}
5040
3ec7d11b
AD
5041/**
5042 * cik_vm_decode_fault - print human readable fault info
5043 *
5044 * @rdev: radeon_device pointer
5045 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5046 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5047 *
5048 * Print human readable fault information (CIK).
5049 */
5050static void cik_vm_decode_fault(struct radeon_device *rdev,
5051 u32 status, u32 addr, u32 mc_client)
5052{
939c0d3c 5053 u32 mc_id;
3ec7d11b
AD
5054 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5055 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
328a50c7
MD
5056 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5057 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
3ec7d11b 5058
939c0d3c
AD
5059 if (rdev->family == CHIP_HAWAII)
5060 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5061 else
5062 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5063
328a50c7 5064 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
3ec7d11b
AD
5065 protections, vmid, addr,
5066 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
328a50c7 5067 block, mc_client, mc_id);
3ec7d11b
AD
5068}
5069
f96ab484
AD
5070/**
5071 * cik_vm_flush - cik vm flush using the CP
5072 *
5073 * @rdev: radeon_device pointer
5074 *
5075 * Update the page table base and flush the VM TLB
5076 * using the CP (CIK).
5077 */
5078void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5079{
5080 struct radeon_ring *ring = &rdev->ring[ridx];
5081
5082 if (vm == NULL)
5083 return;
5084
5085 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5086 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5087 WRITE_DATA_DST_SEL(0)));
5088 if (vm->id < 8) {
5089 radeon_ring_write(ring,
5090 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5091 } else {
5092 radeon_ring_write(ring,
5093 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5094 }
5095 radeon_ring_write(ring, 0);
5096 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5097
5098 /* update SH_MEM_* regs */
5099 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5100 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5101 WRITE_DATA_DST_SEL(0)));
5102 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5103 radeon_ring_write(ring, 0);
5104 radeon_ring_write(ring, VMID(vm->id));
5105
5106 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5107 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5108 WRITE_DATA_DST_SEL(0)));
5109 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5110 radeon_ring_write(ring, 0);
5111
5112 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5113 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5114 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5115 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
21a93e13 5116
f96ab484
AD
5117 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5118 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5119 WRITE_DATA_DST_SEL(0)));
5120 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5121 radeon_ring_write(ring, 0);
5122 radeon_ring_write(ring, VMID(0));
6f2043ce 5123
f96ab484
AD
5124 /* HDP flush */
5125 /* We should be using the WAIT_REG_MEM packet here like in
5126 * cik_fence_ring_emit(), but it causes the CP to hang in this
5127 * context...
5128 */
5129 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5130 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5131 WRITE_DATA_DST_SEL(0)));
5132 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5133 radeon_ring_write(ring, 0);
5134 radeon_ring_write(ring, 0);
5135
5136 /* bits 0-15 are the VM contexts0-15 */
5137 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5138 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5139 WRITE_DATA_DST_SEL(0)));
5140 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5141 radeon_ring_write(ring, 0);
5142 radeon_ring_write(ring, 1 << vm->id);
5143
b07fdd38
AD
5144 /* compute doesn't have PFP */
5145 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5146 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5147 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5148 radeon_ring_write(ring, 0x0);
5149 }
cc066715 5150}
6f2043ce 5151
f6796cae
AD
5152/*
5153 * RLC
5154 * The RLC is a multi-purpose microengine that handles a
5155 * variety of functions, the most important of which is
5156 * the interrupt controller.
5157 */
866d83de
AD
5158static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5159 bool enable)
f6796cae 5160{
866d83de 5161 u32 tmp = RREG32(CP_INT_CNTL_RING0);
f6796cae 5162
866d83de
AD
5163 if (enable)
5164 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5165 else
5166 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
f6796cae 5167 WREG32(CP_INT_CNTL_RING0, tmp);
866d83de 5168}
f6796cae 5169
866d83de 5170static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
cc066715 5171{
cc066715 5172 u32 tmp;
6f2043ce 5173
866d83de
AD
5174 tmp = RREG32(RLC_LB_CNTL);
5175 if (enable)
5176 tmp |= LOAD_BALANCE_ENABLE;
5177 else
5178 tmp &= ~LOAD_BALANCE_ENABLE;
5179 WREG32(RLC_LB_CNTL, tmp);
5180}
cc066715 5181
866d83de
AD
5182static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5183{
5184 u32 i, j, k;
5185 u32 mask;
cc066715 5186
f6796cae
AD
5187 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5188 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5189 cik_select_se_sh(rdev, i, j);
5190 for (k = 0; k < rdev->usec_timeout; k++) {
5191 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5192 break;
5193 udelay(1);
5194 }
5195 }
5196 }
5197 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
cc066715 5198
f6796cae
AD
5199 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5200 for (k = 0; k < rdev->usec_timeout; k++) {
5201 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5202 break;
5203 udelay(1);
5204 }
5205}
cc066715 5206
22c775ce
AD
5207static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5208{
5209 u32 tmp;
cc066715 5210
22c775ce
AD
5211 tmp = RREG32(RLC_CNTL);
5212 if (tmp != rlc)
5213 WREG32(RLC_CNTL, rlc);
5214}
cc066715 5215
22c775ce
AD
5216static u32 cik_halt_rlc(struct radeon_device *rdev)
5217{
5218 u32 data, orig;
cc066715 5219
22c775ce 5220 orig = data = RREG32(RLC_CNTL);
cc066715 5221
22c775ce
AD
5222 if (data & RLC_ENABLE) {
5223 u32 i;
cc066715 5224
22c775ce
AD
5225 data &= ~RLC_ENABLE;
5226 WREG32(RLC_CNTL, data);
cc066715 5227
22c775ce
AD
5228 for (i = 0; i < rdev->usec_timeout; i++) {
5229 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5230 break;
5231 udelay(1);
5232 }
cc066715 5233
22c775ce
AD
5234 cik_wait_for_rlc_serdes(rdev);
5235 }
cc066715 5236
22c775ce
AD
5237 return orig;
5238}
cc066715 5239
a412fce0
AD
5240void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5241{
5242 u32 tmp, i, mask;
5243
5244 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5245 WREG32(RLC_GPR_REG2, tmp);
5246
5247 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5248 for (i = 0; i < rdev->usec_timeout; i++) {
5249 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5250 break;
5251 udelay(1);
5252 }
5253
5254 for (i = 0; i < rdev->usec_timeout; i++) {
5255 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5256 break;
5257 udelay(1);
5258 }
5259}
5260
5261void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5262{
5263 u32 tmp;
5264
5265 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5266 WREG32(RLC_GPR_REG2, tmp);
5267}
5268
866d83de
AD
5269/**
5270 * cik_rlc_stop - stop the RLC ME
5271 *
5272 * @rdev: radeon_device pointer
5273 *
5274 * Halt the RLC ME (MicroEngine) (CIK).
5275 */
5276static void cik_rlc_stop(struct radeon_device *rdev)
5277{
22c775ce 5278 WREG32(RLC_CNTL, 0);
866d83de
AD
5279
5280 cik_enable_gui_idle_interrupt(rdev, false);
5281
866d83de
AD
5282 cik_wait_for_rlc_serdes(rdev);
5283}
5284
f6796cae
AD
5285/**
5286 * cik_rlc_start - start the RLC ME
5287 *
5288 * @rdev: radeon_device pointer
5289 *
5290 * Unhalt the RLC ME (MicroEngine) (CIK).
5291 */
5292static void cik_rlc_start(struct radeon_device *rdev)
5293{
f6796cae 5294 WREG32(RLC_CNTL, RLC_ENABLE);
cc066715 5295
866d83de 5296 cik_enable_gui_idle_interrupt(rdev, true);
cc066715 5297
f6796cae 5298 udelay(50);
6f2043ce
AD
5299}
5300
5301/**
f6796cae 5302 * cik_rlc_resume - setup the RLC hw
6f2043ce
AD
5303 *
5304 * @rdev: radeon_device pointer
5305 *
f6796cae
AD
5306 * Initialize the RLC registers, load the ucode,
5307 * and start the RLC (CIK).
5308 * Returns 0 for success, -EINVAL if the ucode is not available.
6f2043ce 5309 */
f6796cae 5310static int cik_rlc_resume(struct radeon_device *rdev)
6f2043ce 5311{
22c775ce 5312 u32 i, size, tmp;
f6796cae 5313 const __be32 *fw_data;
cc066715 5314
f6796cae
AD
5315 if (!rdev->rlc_fw)
5316 return -EINVAL;
cc066715 5317
f6796cae
AD
5318 switch (rdev->family) {
5319 case CHIP_BONAIRE:
5320 default:
5321 size = BONAIRE_RLC_UCODE_SIZE;
5322 break;
5323 case CHIP_KAVERI:
5324 size = KV_RLC_UCODE_SIZE;
5325 break;
5326 case CHIP_KABINI:
5327 size = KB_RLC_UCODE_SIZE;
5328 break;
5329 }
cc066715 5330
cc066715
AD
5331 cik_rlc_stop(rdev);
5332
22c775ce
AD
5333 /* disable CG */
5334 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5335 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
cc066715 5336
866d83de 5337 si_rlc_reset(rdev);
6f2043ce 5338
22c775ce 5339 cik_init_pg(rdev);
6f2043ce 5340
22c775ce 5341 cik_init_cg(rdev);
cc066715 5342
f6796cae
AD
5343 WREG32(RLC_LB_CNTR_INIT, 0);
5344 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
cc066715 5345
f6796cae
AD
5346 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5347 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5348 WREG32(RLC_LB_PARAMS, 0x00600408);
5349 WREG32(RLC_LB_CNTL, 0x80000004);
cc066715 5350
f6796cae
AD
5351 WREG32(RLC_MC_CNTL, 0);
5352 WREG32(RLC_UCODE_CNTL, 0);
cc066715 5353
f6796cae
AD
5354 fw_data = (const __be32 *)rdev->rlc_fw->data;
5355 WREG32(RLC_GPM_UCODE_ADDR, 0);
5356 for (i = 0; i < size; i++)
5357 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5358 WREG32(RLC_GPM_UCODE_ADDR, 0);
cc066715 5359
866d83de
AD
5360 /* XXX - find out what chips support lbpw */
5361 cik_enable_lbpw(rdev, false);
cc066715 5362
22c775ce
AD
5363 if (rdev->family == CHIP_BONAIRE)
5364 WREG32(RLC_DRIVER_DMA_STATUS, 0);
cc066715 5365
f6796cae 5366 cik_rlc_start(rdev);
cc066715 5367
f6796cae
AD
5368 return 0;
5369}
cc066715 5370
22c775ce
AD
5371static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5372{
5373 u32 data, orig, tmp, tmp2;
cc066715 5374
22c775ce 5375 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
cc066715 5376
473359bc 5377 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
ddc76ff6 5378 cik_enable_gui_idle_interrupt(rdev, true);
cc066715 5379
22c775ce 5380 tmp = cik_halt_rlc(rdev);
cc066715 5381
22c775ce
AD
5382 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5383 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5384 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5385 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5386 WREG32(RLC_SERDES_WR_CTRL, tmp2);
cc066715 5387
22c775ce 5388 cik_update_rlc(rdev, tmp);
cc066715 5389
22c775ce
AD
5390 data |= CGCG_EN | CGLS_EN;
5391 } else {
ddc76ff6 5392 cik_enable_gui_idle_interrupt(rdev, false);
cc066715 5393
22c775ce
AD
5394 RREG32(CB_CGTT_SCLK_CTRL);
5395 RREG32(CB_CGTT_SCLK_CTRL);
5396 RREG32(CB_CGTT_SCLK_CTRL);
5397 RREG32(CB_CGTT_SCLK_CTRL);
cc066715 5398
22c775ce 5399 data &= ~(CGCG_EN | CGLS_EN);
cc066715 5400 }
6f2043ce 5401
22c775ce
AD
5402 if (orig != data)
5403 WREG32(RLC_CGCG_CGLS_CTRL, data);
cc066715 5404
6f2043ce
AD
5405}
5406
22c775ce 5407static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6f2043ce 5408{
22c775ce
AD
5409 u32 data, orig, tmp = 0;
5410
473359bc
AD
5411 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5412 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5413 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5414 orig = data = RREG32(CP_MEM_SLP_CNTL);
5415 data |= CP_MEM_LS_EN;
5416 if (orig != data)
5417 WREG32(CP_MEM_SLP_CNTL, data);
5418 }
5419 }
cc066715 5420
22c775ce
AD
5421 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5422 data &= 0xfffffffd;
5423 if (orig != data)
5424 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5425
5426 tmp = cik_halt_rlc(rdev);
5427
5428 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5429 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5430 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5431 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5432 WREG32(RLC_SERDES_WR_CTRL, data);
5433
5434 cik_update_rlc(rdev, tmp);
5435
473359bc
AD
5436 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5437 orig = data = RREG32(CGTS_SM_CTRL_REG);
5438 data &= ~SM_MODE_MASK;
5439 data |= SM_MODE(0x2);
5440 data |= SM_MODE_ENABLE;
5441 data &= ~CGTS_OVERRIDE;
5442 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5443 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5444 data &= ~CGTS_LS_OVERRIDE;
5445 data &= ~ON_MONITOR_ADD_MASK;
5446 data |= ON_MONITOR_ADD_EN;
5447 data |= ON_MONITOR_ADD(0x96);
5448 if (orig != data)
5449 WREG32(CGTS_SM_CTRL_REG, data);
5450 }
22c775ce
AD
5451 } else {
5452 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5453 data |= 0x00000002;
5454 if (orig != data)
5455 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5456
5457 data = RREG32(RLC_MEM_SLP_CNTL);
5458 if (data & RLC_MEM_LS_EN) {
5459 data &= ~RLC_MEM_LS_EN;
5460 WREG32(RLC_MEM_SLP_CNTL, data);
5461 }
6f2043ce 5462
22c775ce
AD
5463 data = RREG32(CP_MEM_SLP_CNTL);
5464 if (data & CP_MEM_LS_EN) {
5465 data &= ~CP_MEM_LS_EN;
5466 WREG32(CP_MEM_SLP_CNTL, data);
5467 }
cc066715 5468
22c775ce
AD
5469 orig = data = RREG32(CGTS_SM_CTRL_REG);
5470 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5471 if (orig != data)
5472 WREG32(CGTS_SM_CTRL_REG, data);
cc066715 5473
22c775ce 5474 tmp = cik_halt_rlc(rdev);
cc066715 5475
22c775ce
AD
5476 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5477 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5478 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5479 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5480 WREG32(RLC_SERDES_WR_CTRL, data);
cc066715 5481
22c775ce 5482 cik_update_rlc(rdev, tmp);
cc066715 5483 }
6f2043ce 5484}
1c49165d 5485
22c775ce 5486static const u32 mc_cg_registers[] =
21a93e13 5487{
22c775ce
AD
5488 MC_HUB_MISC_HUB_CG,
5489 MC_HUB_MISC_SIP_CG,
5490 MC_HUB_MISC_VM_CG,
5491 MC_XPB_CLK_GAT,
5492 ATC_MISC_CG,
5493 MC_CITF_MISC_WR_CG,
5494 MC_CITF_MISC_RD_CG,
5495 MC_CITF_MISC_VM_CG,
5496 VM_L2_CG,
5497};
21a93e13 5498
22c775ce
AD
5499static void cik_enable_mc_ls(struct radeon_device *rdev,
5500 bool enable)
1c49165d 5501{
22c775ce
AD
5502 int i;
5503 u32 orig, data;
1c49165d 5504
22c775ce
AD
5505 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5506 orig = data = RREG32(mc_cg_registers[i]);
473359bc 5507 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
22c775ce
AD
5508 data |= MC_LS_ENABLE;
5509 else
5510 data &= ~MC_LS_ENABLE;
5511 if (data != orig)
5512 WREG32(mc_cg_registers[i], data);
1c49165d 5513 }
22c775ce 5514}
1c49165d 5515
22c775ce
AD
5516static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5517 bool enable)
5518{
5519 int i;
5520 u32 orig, data;
5521
5522 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5523 orig = data = RREG32(mc_cg_registers[i]);
473359bc 5524 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
22c775ce
AD
5525 data |= MC_CG_ENABLE;
5526 else
5527 data &= ~MC_CG_ENABLE;
5528 if (data != orig)
5529 WREG32(mc_cg_registers[i], data);
1c49165d 5530 }
1c49165d
AD
5531}
5532
22c775ce
AD
5533static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5534 bool enable)
1c49165d 5535{
22c775ce 5536 u32 orig, data;
1c49165d 5537
473359bc 5538 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
22c775ce
AD
5539 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5540 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
1c49165d 5541 } else {
22c775ce
AD
5542 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5543 data |= 0xff000000;
5544 if (data != orig)
5545 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
1c49165d 5546
22c775ce
AD
5547 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5548 data |= 0xff000000;
5549 if (data != orig)
5550 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5551 }
1c49165d
AD
5552}
5553
22c775ce
AD
5554static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5555 bool enable)
1c49165d 5556{
22c775ce
AD
5557 u32 orig, data;
5558
473359bc 5559 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
22c775ce
AD
5560 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5561 data |= 0x100;
5562 if (orig != data)
5563 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5564
5565 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5566 data |= 0x100;
5567 if (orig != data)
5568 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5569 } else {
5570 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5571 data &= ~0x100;
5572 if (orig != data)
5573 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
1c49165d 5574
22c775ce
AD
5575 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5576 data &= ~0x100;
5577 if (orig != data)
5578 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5579 }
1c49165d
AD
5580}
5581
22c775ce
AD
5582static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5583 bool enable)
1c49165d 5584{
22c775ce 5585 u32 orig, data;
1c49165d 5586
473359bc 5587 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
22c775ce
AD
5588 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5589 data = 0xfff;
5590 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
1c49165d 5591
22c775ce
AD
5592 orig = data = RREG32(UVD_CGC_CTRL);
5593 data |= DCM;
5594 if (orig != data)
5595 WREG32(UVD_CGC_CTRL, data);
5596 } else {
5597 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5598 data &= ~0xfff;
5599 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
1c49165d 5600
22c775ce
AD
5601 orig = data = RREG32(UVD_CGC_CTRL);
5602 data &= ~DCM;
5603 if (orig != data)
5604 WREG32(UVD_CGC_CTRL, data);
1c49165d 5605 }
22c775ce 5606}
1c49165d 5607
473359bc
AD
5608static void cik_enable_bif_mgls(struct radeon_device *rdev,
5609 bool enable)
5610{
5611 u32 orig, data;
1c49165d 5612
473359bc 5613 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
1c49165d 5614
473359bc
AD
5615 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5616 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5617 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5618 else
5619 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5620 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
1c49165d 5621
473359bc
AD
5622 if (orig != data)
5623 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5624}
1c49165d 5625
22c775ce
AD
5626static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5627 bool enable)
5628{
5629 u32 orig, data;
1c49165d 5630
22c775ce 5631 orig = data = RREG32(HDP_HOST_PATH_CNTL);
1c49165d 5632
473359bc 5633 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
22c775ce
AD
5634 data &= ~CLOCK_GATING_DIS;
5635 else
5636 data |= CLOCK_GATING_DIS;
5637
5638 if (orig != data)
5639 WREG32(HDP_HOST_PATH_CNTL, data);
1c49165d
AD
5640}
5641
22c775ce
AD
5642static void cik_enable_hdp_ls(struct radeon_device *rdev,
5643 bool enable)
1c49165d 5644{
22c775ce
AD
5645 u32 orig, data;
5646
5647 orig = data = RREG32(HDP_MEM_POWER_LS);
5648
473359bc 5649 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
22c775ce
AD
5650 data |= HDP_LS_ENABLE;
5651 else
5652 data &= ~HDP_LS_ENABLE;
5653
5654 if (orig != data)
5655 WREG32(HDP_MEM_POWER_LS, data);
5656}
5657
5658void cik_update_cg(struct radeon_device *rdev,
5659 u32 block, bool enable)
5660{
4214faf6 5661
22c775ce 5662 if (block & RADEON_CG_BLOCK_GFX) {
4214faf6 5663 cik_enable_gui_idle_interrupt(rdev, false);
22c775ce
AD
5664 /* order matters! */
5665 if (enable) {
5666 cik_enable_mgcg(rdev, true);
5667 cik_enable_cgcg(rdev, true);
5668 } else {
5669 cik_enable_cgcg(rdev, false);
5670 cik_enable_mgcg(rdev, false);
5671 }
4214faf6 5672 cik_enable_gui_idle_interrupt(rdev, true);
22c775ce
AD
5673 }
5674
5675 if (block & RADEON_CG_BLOCK_MC) {
5676 if (!(rdev->flags & RADEON_IS_IGP)) {
5677 cik_enable_mc_mgcg(rdev, enable);
5678 cik_enable_mc_ls(rdev, enable);
5679 }
5680 }
5681
5682 if (block & RADEON_CG_BLOCK_SDMA) {
5683 cik_enable_sdma_mgcg(rdev, enable);
5684 cik_enable_sdma_mgls(rdev, enable);
5685 }
5686
473359bc
AD
5687 if (block & RADEON_CG_BLOCK_BIF) {
5688 cik_enable_bif_mgls(rdev, enable);
5689 }
5690
22c775ce
AD
5691 if (block & RADEON_CG_BLOCK_UVD) {
5692 if (rdev->has_uvd)
5693 cik_enable_uvd_mgcg(rdev, enable);
5694 }
5695
5696 if (block & RADEON_CG_BLOCK_HDP) {
5697 cik_enable_hdp_mgcg(rdev, enable);
5698 cik_enable_hdp_ls(rdev, enable);
5699 }
1c49165d
AD
5700}
5701
22c775ce 5702static void cik_init_cg(struct radeon_device *rdev)
1c49165d 5703{
22c775ce 5704
ddc76ff6 5705 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
22c775ce
AD
5706
5707 if (rdev->has_uvd)
5708 si_init_uvd_internal_cg(rdev);
5709
5710 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5711 RADEON_CG_BLOCK_SDMA |
473359bc 5712 RADEON_CG_BLOCK_BIF |
22c775ce
AD
5713 RADEON_CG_BLOCK_UVD |
5714 RADEON_CG_BLOCK_HDP), true);
1c49165d
AD
5715}
5716
473359bc 5717static void cik_fini_cg(struct radeon_device *rdev)
1c49165d 5718{
473359bc
AD
5719 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5720 RADEON_CG_BLOCK_SDMA |
5721 RADEON_CG_BLOCK_BIF |
5722 RADEON_CG_BLOCK_UVD |
5723 RADEON_CG_BLOCK_HDP), false);
5724
5725 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
1c49165d
AD
5726}
5727
22c775ce
AD
5728static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5729 bool enable)
1c49165d 5730{
22c775ce 5731 u32 data, orig;
1c49165d 5732
22c775ce 5733 orig = data = RREG32(RLC_PG_CNTL);
473359bc 5734 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
22c775ce
AD
5735 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5736 else
5737 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5738 if (orig != data)
5739 WREG32(RLC_PG_CNTL, data);
1c49165d
AD
5740}
5741
22c775ce
AD
5742static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5743 bool enable)
1c49165d 5744{
22c775ce
AD
5745 u32 data, orig;
5746
5747 orig = data = RREG32(RLC_PG_CNTL);
473359bc 5748 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
22c775ce
AD
5749 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5750 else
5751 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5752 if (orig != data)
5753 WREG32(RLC_PG_CNTL, data);
1c49165d
AD
5754}
5755
22c775ce 5756static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
3ec7d11b 5757{
22c775ce 5758 u32 data, orig;
3ec7d11b 5759
22c775ce 5760 orig = data = RREG32(RLC_PG_CNTL);
473359bc 5761 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
22c775ce
AD
5762 data &= ~DISABLE_CP_PG;
5763 else
5764 data |= DISABLE_CP_PG;
5765 if (orig != data)
5766 WREG32(RLC_PG_CNTL, data);
3ec7d11b
AD
5767}
5768
22c775ce 5769static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
f96ab484 5770{
22c775ce 5771 u32 data, orig;
f96ab484 5772
22c775ce 5773 orig = data = RREG32(RLC_PG_CNTL);
473359bc 5774 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
22c775ce
AD
5775 data &= ~DISABLE_GDS_PG;
5776 else
5777 data |= DISABLE_GDS_PG;
5778 if (orig != data)
5779 WREG32(RLC_PG_CNTL, data);
5780}
5781
5782#define CP_ME_TABLE_SIZE 96
5783#define CP_ME_TABLE_OFFSET 2048
5784#define CP_MEC_TABLE_OFFSET 4096
5785
5786void cik_init_cp_pg_table(struct radeon_device *rdev)
5787{
5788 const __be32 *fw_data;
5789 volatile u32 *dst_ptr;
5790 int me, i, max_me = 4;
5791 u32 bo_offset = 0;
5792 u32 table_offset;
5793
5794 if (rdev->family == CHIP_KAVERI)
5795 max_me = 5;
5796
5797 if (rdev->rlc.cp_table_ptr == NULL)
f96ab484
AD
5798 return;
5799
22c775ce
AD
5800 /* write the cp table buffer */
5801 dst_ptr = rdev->rlc.cp_table_ptr;
5802 for (me = 0; me < max_me; me++) {
5803 if (me == 0) {
5804 fw_data = (const __be32 *)rdev->ce_fw->data;
5805 table_offset = CP_ME_TABLE_OFFSET;
5806 } else if (me == 1) {
5807 fw_data = (const __be32 *)rdev->pfp_fw->data;
5808 table_offset = CP_ME_TABLE_OFFSET;
5809 } else if (me == 2) {
5810 fw_data = (const __be32 *)rdev->me_fw->data;
5811 table_offset = CP_ME_TABLE_OFFSET;
5812 } else {
5813 fw_data = (const __be32 *)rdev->mec_fw->data;
5814 table_offset = CP_MEC_TABLE_OFFSET;
5815 }
5816
5817 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6ba81e53 5818 dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
22c775ce
AD
5819 }
5820 bo_offset += CP_ME_TABLE_SIZE;
f96ab484 5821 }
22c775ce 5822}
f96ab484 5823
22c775ce
AD
5824static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5825 bool enable)
5826{
5827 u32 data, orig;
5828
2b19d17f 5829 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
22c775ce
AD
5830 orig = data = RREG32(RLC_PG_CNTL);
5831 data |= GFX_PG_ENABLE;
5832 if (orig != data)
5833 WREG32(RLC_PG_CNTL, data);
5834
5835 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5836 data |= AUTO_PG_EN;
5837 if (orig != data)
5838 WREG32(RLC_AUTO_PG_CTRL, data);
5839 } else {
5840 orig = data = RREG32(RLC_PG_CNTL);
5841 data &= ~GFX_PG_ENABLE;
5842 if (orig != data)
5843 WREG32(RLC_PG_CNTL, data);
f96ab484 5844
22c775ce
AD
5845 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5846 data &= ~AUTO_PG_EN;
5847 if (orig != data)
5848 WREG32(RLC_AUTO_PG_CTRL, data);
f96ab484 5849
22c775ce
AD
5850 data = RREG32(DB_RENDER_CONTROL);
5851 }
5852}
f96ab484 5853
22c775ce
AD
5854static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5855{
5856 u32 mask = 0, tmp, tmp1;
5857 int i;
f96ab484 5858
22c775ce
AD
5859 cik_select_se_sh(rdev, se, sh);
5860 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5861 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5862 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
f96ab484 5863
22c775ce 5864 tmp &= 0xffff0000;
f96ab484 5865
22c775ce
AD
5866 tmp |= tmp1;
5867 tmp >>= 16;
5868
5869 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5870 mask <<= 1;
5871 mask |= 1;
b07fdd38 5872 }
22c775ce
AD
5873
5874 return (~tmp) & mask;
f96ab484
AD
5875}
5876
22c775ce 5877static void cik_init_ao_cu_mask(struct radeon_device *rdev)
d0e092d9 5878{
22c775ce
AD
5879 u32 i, j, k, active_cu_number = 0;
5880 u32 mask, counter, cu_bitmap;
5881 u32 tmp = 0;
d0e092d9 5882
22c775ce
AD
5883 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5884 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5885 mask = 1;
5886 cu_bitmap = 0;
5887 counter = 0;
5888 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5889 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5890 if (counter < 2)
5891 cu_bitmap |= mask;
5892 counter ++;
d0e092d9 5893 }
22c775ce 5894 mask <<= 1;
d0e092d9 5895 }
d0e092d9 5896
22c775ce
AD
5897 active_cu_number += counter;
5898 tmp |= (cu_bitmap << (i * 16 + j * 8));
d0e092d9 5899 }
d0e092d9 5900 }
22c775ce
AD
5901
5902 WREG32(RLC_PG_AO_CU_MASK, tmp);
5903
5904 tmp = RREG32(RLC_MAX_PG_CU);
5905 tmp &= ~MAX_PU_CU_MASK;
5906 tmp |= MAX_PU_CU(active_cu_number);
5907 WREG32(RLC_MAX_PG_CU, tmp);
d0e092d9
AD
5908}
5909
22c775ce
AD
5910static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5911 bool enable)
605de6b9 5912{
22c775ce 5913 u32 data, orig;
605de6b9 5914
22c775ce 5915 orig = data = RREG32(RLC_PG_CNTL);
473359bc 5916 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
22c775ce
AD
5917 data |= STATIC_PER_CU_PG_ENABLE;
5918 else
5919 data &= ~STATIC_PER_CU_PG_ENABLE;
5920 if (orig != data)
5921 WREG32(RLC_PG_CNTL, data);
5922}
5923
5924static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5925 bool enable)
5926{
5927 u32 data, orig;
605de6b9 5928
22c775ce 5929 orig = data = RREG32(RLC_PG_CNTL);
473359bc 5930 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
22c775ce 5931 data |= DYN_PER_CU_PG_ENABLE;
605de6b9 5932 else
22c775ce
AD
5933 data &= ~DYN_PER_CU_PG_ENABLE;
5934 if (orig != data)
5935 WREG32(RLC_PG_CNTL, data);
5936}
605de6b9 5937
22c775ce
AD
5938#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5939#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5940
5941static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5942{
5943 u32 data, orig;
5944 u32 i;
5945
5946 if (rdev->rlc.cs_data) {
5947 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5948 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
a0f38609 5949 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
22c775ce 5950 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
605de6b9 5951 } else {
22c775ce
AD
5952 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5953 for (i = 0; i < 3; i++)
5954 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5955 }
5956 if (rdev->rlc.reg_list) {
5957 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5958 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5959 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
605de6b9 5960 }
605de6b9 5961
22c775ce
AD
5962 orig = data = RREG32(RLC_PG_CNTL);
5963 data |= GFX_PG_SRC;
5964 if (orig != data)
5965 WREG32(RLC_PG_CNTL, data);
605de6b9 5966
22c775ce
AD
5967 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5968 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
605de6b9 5969
22c775ce
AD
5970 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5971 data &= ~IDLE_POLL_COUNT_MASK;
5972 data |= IDLE_POLL_COUNT(0x60);
5973 WREG32(CP_RB_WPTR_POLL_CNTL, data);
605de6b9 5974
22c775ce
AD
5975 data = 0x10101010;
5976 WREG32(RLC_PG_DELAY, data);
605de6b9 5977
22c775ce
AD
5978 data = RREG32(RLC_PG_DELAY_2);
5979 data &= ~0xff;
5980 data |= 0x3;
5981 WREG32(RLC_PG_DELAY_2, data);
605de6b9 5982
22c775ce
AD
5983 data = RREG32(RLC_AUTO_PG_CTRL);
5984 data &= ~GRBM_REG_SGIT_MASK;
5985 data |= GRBM_REG_SGIT(0x700);
5986 WREG32(RLC_AUTO_PG_CTRL, data);
605de6b9 5987
605de6b9
AD
5988}
5989
22c775ce 5990static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
f6796cae 5991{
473359bc
AD
5992 cik_enable_gfx_cgpg(rdev, enable);
5993 cik_enable_gfx_static_mgpg(rdev, enable);
5994 cik_enable_gfx_dynamic_mgpg(rdev, enable);
22c775ce 5995}
f6796cae 5996
a0f38609
AD
5997u32 cik_get_csb_size(struct radeon_device *rdev)
5998{
5999 u32 count = 0;
6000 const struct cs_section_def *sect = NULL;
6001 const struct cs_extent_def *ext = NULL;
f6796cae 6002
a0f38609
AD
6003 if (rdev->rlc.cs_data == NULL)
6004 return 0;
f6796cae 6005
a0f38609
AD
6006 /* begin clear state */
6007 count += 2;
6008 /* context control state */
6009 count += 3;
6010
6011 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6012 for (ext = sect->section; ext->extent != NULL; ++ext) {
6013 if (sect->id == SECT_CONTEXT)
6014 count += 2 + ext->reg_count;
6015 else
6016 return 0;
f6796cae
AD
6017 }
6018 }
a0f38609
AD
6019 /* pa_sc_raster_config/pa_sc_raster_config1 */
6020 count += 4;
6021 /* end clear state */
6022 count += 2;
6023 /* clear state */
6024 count += 2;
f6796cae 6025
a0f38609 6026 return count;
f6796cae
AD
6027}
6028
a0f38609 6029void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
f6796cae 6030{
a0f38609
AD
6031 u32 count = 0, i;
6032 const struct cs_section_def *sect = NULL;
6033 const struct cs_extent_def *ext = NULL;
f6796cae 6034
a0f38609
AD
6035 if (rdev->rlc.cs_data == NULL)
6036 return;
6037 if (buffer == NULL)
6038 return;
f6796cae 6039
6ba81e53
AD
6040 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6041 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
a0f38609 6042
6ba81e53
AD
6043 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6044 buffer[count++] = cpu_to_le32(0x80000000);
6045 buffer[count++] = cpu_to_le32(0x80000000);
a0f38609
AD
6046
6047 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6048 for (ext = sect->section; ext->extent != NULL; ++ext) {
6049 if (sect->id == SECT_CONTEXT) {
6ba81e53
AD
6050 buffer[count++] =
6051 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6052 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
a0f38609 6053 for (i = 0; i < ext->reg_count; i++)
6ba81e53 6054 buffer[count++] = cpu_to_le32(ext->extent[i]);
a0f38609
AD
6055 } else {
6056 return;
6057 }
6058 }
6059 }
f6796cae 6060
6ba81e53
AD
6061 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6062 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
f6796cae
AD
6063 switch (rdev->family) {
6064 case CHIP_BONAIRE:
6ba81e53
AD
6065 buffer[count++] = cpu_to_le32(0x16000012);
6066 buffer[count++] = cpu_to_le32(0x00000000);
f6796cae
AD
6067 break;
6068 case CHIP_KAVERI:
6ba81e53
AD
6069 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6070 buffer[count++] = cpu_to_le32(0x00000000);
f6796cae
AD
6071 break;
6072 case CHIP_KABINI:
6ba81e53
AD
6073 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6074 buffer[count++] = cpu_to_le32(0x00000000);
a0f38609
AD
6075 break;
6076 default:
6ba81e53
AD
6077 buffer[count++] = cpu_to_le32(0x00000000);
6078 buffer[count++] = cpu_to_le32(0x00000000);
f6796cae
AD
6079 break;
6080 }
6081
6ba81e53
AD
6082 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6083 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
f6796cae 6084
6ba81e53
AD
6085 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6086 buffer[count++] = cpu_to_le32(0);
a0f38609 6087}
f6796cae 6088
473359bc 6089static void cik_init_pg(struct radeon_device *rdev)
22c775ce 6090{
473359bc 6091 if (rdev->pg_flags) {
22c775ce
AD
6092 cik_enable_sck_slowdown_on_pu(rdev, true);
6093 cik_enable_sck_slowdown_on_pd(rdev, true);
2b19d17f 6094 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
473359bc
AD
6095 cik_init_gfx_cgpg(rdev);
6096 cik_enable_cp_pg(rdev, true);
6097 cik_enable_gds_pg(rdev, true);
6098 }
22c775ce
AD
6099 cik_init_ao_cu_mask(rdev);
6100 cik_update_gfx_pg(rdev, true);
6101 }
6102}
f6796cae 6103
473359bc
AD
6104static void cik_fini_pg(struct radeon_device *rdev)
6105{
6106 if (rdev->pg_flags) {
6107 cik_update_gfx_pg(rdev, false);
2b19d17f 6108 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
473359bc
AD
6109 cik_enable_cp_pg(rdev, false);
6110 cik_enable_gds_pg(rdev, false);
6111 }
6112 }
f6796cae 6113}
a59781bb
AD
6114
6115/*
6116 * Interrupts
6117 * Starting with r6xx, interrupts are handled via a ring buffer.
6118 * Ring buffers are areas of GPU accessible memory that the GPU
6119 * writes interrupt vectors into and the host reads vectors out of.
6120 * There is a rptr (read pointer) that determines where the
6121 * host is currently reading, and a wptr (write pointer)
6122 * which determines where the GPU has written. When the
6123 * pointers are equal, the ring is idle. When the GPU
6124 * writes vectors to the ring buffer, it increments the
6125 * wptr. When there is an interrupt, the host then starts
6126 * fetching commands and processing them until the pointers are
6127 * equal again at which point it updates the rptr.
6128 */
6129
6130/**
6131 * cik_enable_interrupts - Enable the interrupt ring buffer
6132 *
6133 * @rdev: radeon_device pointer
6134 *
6135 * Enable the interrupt ring buffer (CIK).
6136 */
6137static void cik_enable_interrupts(struct radeon_device *rdev)
6138{
6139 u32 ih_cntl = RREG32(IH_CNTL);
6140 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6141
6142 ih_cntl |= ENABLE_INTR;
6143 ih_rb_cntl |= IH_RB_ENABLE;
6144 WREG32(IH_CNTL, ih_cntl);
6145 WREG32(IH_RB_CNTL, ih_rb_cntl);
6146 rdev->ih.enabled = true;
6147}
6148
6149/**
6150 * cik_disable_interrupts - Disable the interrupt ring buffer
6151 *
6152 * @rdev: radeon_device pointer
6153 *
6154 * Disable the interrupt ring buffer (CIK).
6155 */
6156static void cik_disable_interrupts(struct radeon_device *rdev)
6157{
6158 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6159 u32 ih_cntl = RREG32(IH_CNTL);
6160
6161 ih_rb_cntl &= ~IH_RB_ENABLE;
6162 ih_cntl &= ~ENABLE_INTR;
6163 WREG32(IH_RB_CNTL, ih_rb_cntl);
6164 WREG32(IH_CNTL, ih_cntl);
6165 /* set rptr, wptr to 0 */
6166 WREG32(IH_RB_RPTR, 0);
6167 WREG32(IH_RB_WPTR, 0);
6168 rdev->ih.enabled = false;
6169 rdev->ih.rptr = 0;
6170}
6171
6172/**
6173 * cik_disable_interrupt_state - Disable all interrupt sources
6174 *
6175 * @rdev: radeon_device pointer
6176 *
6177 * Clear all interrupt enable bits used by the driver (CIK).
6178 */
6179static void cik_disable_interrupt_state(struct radeon_device *rdev)
6180{
6181 u32 tmp;
6182
6183 /* gfx ring */
4214faf6
AD
6184 tmp = RREG32(CP_INT_CNTL_RING0) &
6185 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6186 WREG32(CP_INT_CNTL_RING0, tmp);
21a93e13
AD
6187 /* sdma */
6188 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6189 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6190 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6191 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
a59781bb
AD
6192 /* compute queues */
6193 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6194 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6195 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6196 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6197 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6198 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6199 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6200 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6201 /* grbm */
6202 WREG32(GRBM_INT_CNTL, 0);
6203 /* vline/vblank, etc. */
6204 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6205 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6206 if (rdev->num_crtc >= 4) {
6207 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6208 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6209 }
6210 if (rdev->num_crtc >= 6) {
6211 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6212 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6213 }
6214
6215 /* dac hotplug */
6216 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6217
6218 /* digital hotplug */
6219 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6220 WREG32(DC_HPD1_INT_CONTROL, tmp);
6221 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6222 WREG32(DC_HPD2_INT_CONTROL, tmp);
6223 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6224 WREG32(DC_HPD3_INT_CONTROL, tmp);
6225 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6226 WREG32(DC_HPD4_INT_CONTROL, tmp);
6227 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6228 WREG32(DC_HPD5_INT_CONTROL, tmp);
6229 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6230 WREG32(DC_HPD6_INT_CONTROL, tmp);
6231
6232}
6233
6234/**
6235 * cik_irq_init - init and enable the interrupt ring
6236 *
6237 * @rdev: radeon_device pointer
6238 *
6239 * Allocate a ring buffer for the interrupt controller,
6240 * enable the RLC, disable interrupts, enable the IH
6241 * ring buffer and enable it (CIK).
6242 * Called at device load and reume.
6243 * Returns 0 for success, errors for failure.
6244 */
6245static int cik_irq_init(struct radeon_device *rdev)
6246{
6247 int ret = 0;
6248 int rb_bufsz;
6249 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6250
6251 /* allocate ring */
6252 ret = r600_ih_ring_alloc(rdev);
6253 if (ret)
6254 return ret;
6255
6256 /* disable irqs */
6257 cik_disable_interrupts(rdev);
6258
6259 /* init rlc */
6260 ret = cik_rlc_resume(rdev);
6261 if (ret) {
6262 r600_ih_ring_fini(rdev);
6263 return ret;
6264 }
6265
6266 /* setup interrupt control */
6267 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6268 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6269 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6270 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6271 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6272 */
6273 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6274 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6275 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6276 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6277
6278 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
b72a8925 6279 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
a59781bb
AD
6280
6281 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6282 IH_WPTR_OVERFLOW_CLEAR |
6283 (rb_bufsz << 1));
6284
6285 if (rdev->wb.enabled)
6286 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6287
6288 /* set the writeback address whether it's enabled or not */
6289 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6290 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6291
6292 WREG32(IH_RB_CNTL, ih_rb_cntl);
6293
6294 /* set rptr, wptr to 0 */
6295 WREG32(IH_RB_RPTR, 0);
6296 WREG32(IH_RB_WPTR, 0);
6297
6298 /* Default settings for IH_CNTL (disabled at first) */
6299 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6300 /* RPTR_REARM only works if msi's are enabled */
6301 if (rdev->msi_enabled)
6302 ih_cntl |= RPTR_REARM;
6303 WREG32(IH_CNTL, ih_cntl);
6304
6305 /* force the active interrupt state to all disabled */
6306 cik_disable_interrupt_state(rdev);
6307
6308 pci_set_master(rdev->pdev);
6309
6310 /* enable irqs */
6311 cik_enable_interrupts(rdev);
6312
6313 return ret;
6314}
6315
6316/**
6317 * cik_irq_set - enable/disable interrupt sources
6318 *
6319 * @rdev: radeon_device pointer
6320 *
6321 * Enable interrupt sources on the GPU (vblanks, hpd,
6322 * etc.) (CIK).
6323 * Returns 0 for success, errors for failure.
6324 */
6325int cik_irq_set(struct radeon_device *rdev)
6326{
4214faf6 6327 u32 cp_int_cntl;
2b0781a6
AD
6328 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6329 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
a59781bb
AD
6330 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6331 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6332 u32 grbm_int_cntl = 0;
21a93e13 6333 u32 dma_cntl, dma_cntl1;
41a524ab 6334 u32 thermal_int;
a59781bb
AD
6335
6336 if (!rdev->irq.installed) {
6337 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6338 return -EINVAL;
6339 }
6340 /* don't enable anything if the ih is disabled */
6341 if (!rdev->ih.enabled) {
6342 cik_disable_interrupts(rdev);
6343 /* force the active interrupt state to all disabled */
6344 cik_disable_interrupt_state(rdev);
6345 return 0;
6346 }
6347
4214faf6
AD
6348 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6349 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6350 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6351
a59781bb
AD
6352 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6353 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6354 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6355 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6356 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6357 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6358
21a93e13
AD
6359 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6360 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6361
2b0781a6
AD
6362 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6363 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6364 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6365 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6366 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6367 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6368 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6369 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6370
cc8dbbb4
AD
6371 if (rdev->flags & RADEON_IS_IGP)
6372 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6373 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6374 else
6375 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6376 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
41a524ab 6377
a59781bb
AD
6378 /* enable CP interrupts on all rings */
6379 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6380 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6381 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6382 }
2b0781a6
AD
6383 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6384 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6385 DRM_DEBUG("si_irq_set: sw int cp1\n");
6386 if (ring->me == 1) {
6387 switch (ring->pipe) {
6388 case 0:
6389 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6390 break;
6391 case 1:
6392 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6393 break;
6394 case 2:
6395 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6396 break;
6397 case 3:
6398 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6399 break;
6400 default:
6401 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6402 break;
6403 }
6404 } else if (ring->me == 2) {
6405 switch (ring->pipe) {
6406 case 0:
6407 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6408 break;
6409 case 1:
6410 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6411 break;
6412 case 2:
6413 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6414 break;
6415 case 3:
6416 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6417 break;
6418 default:
6419 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6420 break;
6421 }
6422 } else {
6423 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6424 }
6425 }
6426 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6427 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6428 DRM_DEBUG("si_irq_set: sw int cp2\n");
6429 if (ring->me == 1) {
6430 switch (ring->pipe) {
6431 case 0:
6432 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6433 break;
6434 case 1:
6435 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6436 break;
6437 case 2:
6438 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6439 break;
6440 case 3:
6441 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6442 break;
6443 default:
6444 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6445 break;
6446 }
6447 } else if (ring->me == 2) {
6448 switch (ring->pipe) {
6449 case 0:
6450 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6451 break;
6452 case 1:
6453 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6454 break;
6455 case 2:
6456 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6457 break;
6458 case 3:
6459 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6460 break;
6461 default:
6462 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6463 break;
6464 }
6465 } else {
6466 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6467 }
6468 }
a59781bb 6469
21a93e13
AD
6470 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6471 DRM_DEBUG("cik_irq_set: sw int dma\n");
6472 dma_cntl |= TRAP_ENABLE;
6473 }
6474
6475 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6476 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6477 dma_cntl1 |= TRAP_ENABLE;
6478 }
6479
a59781bb
AD
6480 if (rdev->irq.crtc_vblank_int[0] ||
6481 atomic_read(&rdev->irq.pflip[0])) {
6482 DRM_DEBUG("cik_irq_set: vblank 0\n");
6483 crtc1 |= VBLANK_INTERRUPT_MASK;
6484 }
6485 if (rdev->irq.crtc_vblank_int[1] ||
6486 atomic_read(&rdev->irq.pflip[1])) {
6487 DRM_DEBUG("cik_irq_set: vblank 1\n");
6488 crtc2 |= VBLANK_INTERRUPT_MASK;
6489 }
6490 if (rdev->irq.crtc_vblank_int[2] ||
6491 atomic_read(&rdev->irq.pflip[2])) {
6492 DRM_DEBUG("cik_irq_set: vblank 2\n");
6493 crtc3 |= VBLANK_INTERRUPT_MASK;
6494 }
6495 if (rdev->irq.crtc_vblank_int[3] ||
6496 atomic_read(&rdev->irq.pflip[3])) {
6497 DRM_DEBUG("cik_irq_set: vblank 3\n");
6498 crtc4 |= VBLANK_INTERRUPT_MASK;
6499 }
6500 if (rdev->irq.crtc_vblank_int[4] ||
6501 atomic_read(&rdev->irq.pflip[4])) {
6502 DRM_DEBUG("cik_irq_set: vblank 4\n");
6503 crtc5 |= VBLANK_INTERRUPT_MASK;
6504 }
6505 if (rdev->irq.crtc_vblank_int[5] ||
6506 atomic_read(&rdev->irq.pflip[5])) {
6507 DRM_DEBUG("cik_irq_set: vblank 5\n");
6508 crtc6 |= VBLANK_INTERRUPT_MASK;
6509 }
6510 if (rdev->irq.hpd[0]) {
6511 DRM_DEBUG("cik_irq_set: hpd 1\n");
6512 hpd1 |= DC_HPDx_INT_EN;
6513 }
6514 if (rdev->irq.hpd[1]) {
6515 DRM_DEBUG("cik_irq_set: hpd 2\n");
6516 hpd2 |= DC_HPDx_INT_EN;
6517 }
6518 if (rdev->irq.hpd[2]) {
6519 DRM_DEBUG("cik_irq_set: hpd 3\n");
6520 hpd3 |= DC_HPDx_INT_EN;
6521 }
6522 if (rdev->irq.hpd[3]) {
6523 DRM_DEBUG("cik_irq_set: hpd 4\n");
6524 hpd4 |= DC_HPDx_INT_EN;
6525 }
6526 if (rdev->irq.hpd[4]) {
6527 DRM_DEBUG("cik_irq_set: hpd 5\n");
6528 hpd5 |= DC_HPDx_INT_EN;
6529 }
6530 if (rdev->irq.hpd[5]) {
6531 DRM_DEBUG("cik_irq_set: hpd 6\n");
6532 hpd6 |= DC_HPDx_INT_EN;
6533 }
6534
41a524ab
AD
6535 if (rdev->irq.dpm_thermal) {
6536 DRM_DEBUG("dpm thermal\n");
cc8dbbb4
AD
6537 if (rdev->flags & RADEON_IS_IGP)
6538 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6539 else
6540 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
41a524ab
AD
6541 }
6542
a59781bb
AD
6543 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6544
21a93e13
AD
6545 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6546 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6547
2b0781a6
AD
6548 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6549 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6550 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6551 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6552 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6553 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6554 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6555 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6556
a59781bb
AD
6557 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6558
6559 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6560 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6561 if (rdev->num_crtc >= 4) {
6562 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6563 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6564 }
6565 if (rdev->num_crtc >= 6) {
6566 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6567 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6568 }
6569
6570 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6571 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6572 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6573 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6574 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6575 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6576
cc8dbbb4
AD
6577 if (rdev->flags & RADEON_IS_IGP)
6578 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6579 else
6580 WREG32_SMC(CG_THERMAL_INT, thermal_int);
41a524ab 6581
a59781bb
AD
6582 return 0;
6583}
6584
6585/**
6586 * cik_irq_ack - ack interrupt sources
6587 *
6588 * @rdev: radeon_device pointer
6589 *
6590 * Ack interrupt sources on the GPU (vblanks, hpd,
6591 * etc.) (CIK). Certain interrupts sources are sw
6592 * generated and do not require an explicit ack.
6593 */
6594static inline void cik_irq_ack(struct radeon_device *rdev)
6595{
6596 u32 tmp;
6597
6598 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6599 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6600 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6601 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6602 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6603 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6604 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6605
6606 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6607 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6608 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6609 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6610 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6611 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6612 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6613 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6614
6615 if (rdev->num_crtc >= 4) {
6616 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6617 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6618 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6619 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6620 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6621 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6622 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6623 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6624 }
6625
6626 if (rdev->num_crtc >= 6) {
6627 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6628 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6629 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6630 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6631 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6632 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6633 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6634 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6635 }
6636
6637 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6638 tmp = RREG32(DC_HPD1_INT_CONTROL);
6639 tmp |= DC_HPDx_INT_ACK;
6640 WREG32(DC_HPD1_INT_CONTROL, tmp);
6641 }
6642 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6643 tmp = RREG32(DC_HPD2_INT_CONTROL);
6644 tmp |= DC_HPDx_INT_ACK;
6645 WREG32(DC_HPD2_INT_CONTROL, tmp);
6646 }
6647 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6648 tmp = RREG32(DC_HPD3_INT_CONTROL);
6649 tmp |= DC_HPDx_INT_ACK;
6650 WREG32(DC_HPD3_INT_CONTROL, tmp);
6651 }
6652 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6653 tmp = RREG32(DC_HPD4_INT_CONTROL);
6654 tmp |= DC_HPDx_INT_ACK;
6655 WREG32(DC_HPD4_INT_CONTROL, tmp);
6656 }
6657 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6658 tmp = RREG32(DC_HPD5_INT_CONTROL);
6659 tmp |= DC_HPDx_INT_ACK;
6660 WREG32(DC_HPD5_INT_CONTROL, tmp);
6661 }
6662 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6663 tmp = RREG32(DC_HPD5_INT_CONTROL);
6664 tmp |= DC_HPDx_INT_ACK;
6665 WREG32(DC_HPD6_INT_CONTROL, tmp);
6666 }
6667}
6668
6669/**
6670 * cik_irq_disable - disable interrupts
6671 *
6672 * @rdev: radeon_device pointer
6673 *
6674 * Disable interrupts on the hw (CIK).
6675 */
6676static void cik_irq_disable(struct radeon_device *rdev)
6677{
6678 cik_disable_interrupts(rdev);
6679 /* Wait and acknowledge irq */
6680 mdelay(1);
6681 cik_irq_ack(rdev);
6682 cik_disable_interrupt_state(rdev);
6683}
6684
6685/**
6686 * cik_irq_disable - disable interrupts for suspend
6687 *
6688 * @rdev: radeon_device pointer
6689 *
6690 * Disable interrupts and stop the RLC (CIK).
6691 * Used for suspend.
6692 */
6693static void cik_irq_suspend(struct radeon_device *rdev)
6694{
6695 cik_irq_disable(rdev);
6696 cik_rlc_stop(rdev);
6697}
6698
6699/**
6700 * cik_irq_fini - tear down interrupt support
6701 *
6702 * @rdev: radeon_device pointer
6703 *
6704 * Disable interrupts on the hw and free the IH ring
6705 * buffer (CIK).
6706 * Used for driver unload.
6707 */
6708static void cik_irq_fini(struct radeon_device *rdev)
6709{
6710 cik_irq_suspend(rdev);
6711 r600_ih_ring_fini(rdev);
6712}
6713
6714/**
6715 * cik_get_ih_wptr - get the IH ring buffer wptr
6716 *
6717 * @rdev: radeon_device pointer
6718 *
6719 * Get the IH ring buffer wptr from either the register
6720 * or the writeback memory buffer (CIK). Also check for
6721 * ring buffer overflow and deal with it.
6722 * Used by cik_irq_process().
6723 * Returns the value of the wptr.
6724 */
6725static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6726{
6727 u32 wptr, tmp;
6728
6729 if (rdev->wb.enabled)
6730 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6731 else
6732 wptr = RREG32(IH_RB_WPTR);
6733
6734 if (wptr & RB_OVERFLOW) {
6735 /* When a ring buffer overflow happen start parsing interrupt
6736 * from the last not overwritten vector (wptr + 16). Hopefully
6737 * this should allow us to catchup.
6738 */
6739 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6740 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6741 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6742 tmp = RREG32(IH_RB_CNTL);
6743 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6744 WREG32(IH_RB_CNTL, tmp);
6745 }
6746 return (wptr & rdev->ih.ptr_mask);
6747}
6748
6749/* CIK IV Ring
6750 * Each IV ring entry is 128 bits:
6751 * [7:0] - interrupt source id
6752 * [31:8] - reserved
6753 * [59:32] - interrupt source data
6754 * [63:60] - reserved
21a93e13
AD
6755 * [71:64] - RINGID
6756 * CP:
6757 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
a59781bb
AD
6758 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6759 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6760 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6761 * PIPE_ID - ME0 0=3D
6762 * - ME1&2 compute dispatcher (4 pipes each)
21a93e13
AD
6763 * SDMA:
6764 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6765 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6766 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
a59781bb
AD
6767 * [79:72] - VMID
6768 * [95:80] - PASID
6769 * [127:96] - reserved
6770 */
6771/**
6772 * cik_irq_process - interrupt handler
6773 *
6774 * @rdev: radeon_device pointer
6775 *
6776 * Interrupt hander (CIK). Walk the IH ring,
6777 * ack interrupts and schedule work to handle
6778 * interrupt events.
6779 * Returns irq process return code.
6780 */
6781int cik_irq_process(struct radeon_device *rdev)
6782{
2b0781a6
AD
6783 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6784 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
a59781bb
AD
6785 u32 wptr;
6786 u32 rptr;
6787 u32 src_id, src_data, ring_id;
6788 u8 me_id, pipe_id, queue_id;
6789 u32 ring_index;
6790 bool queue_hotplug = false;
6791 bool queue_reset = false;
3ec7d11b 6792 u32 addr, status, mc_client;
41a524ab 6793 bool queue_thermal = false;
a59781bb
AD
6794
6795 if (!rdev->ih.enabled || rdev->shutdown)
6796 return IRQ_NONE;
6797
6798 wptr = cik_get_ih_wptr(rdev);
6799
6800restart_ih:
6801 /* is somebody else already processing irqs? */
6802 if (atomic_xchg(&rdev->ih.lock, 1))
6803 return IRQ_NONE;
6804
6805 rptr = rdev->ih.rptr;
6806 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6807
6808 /* Order reading of wptr vs. reading of IH ring data */
6809 rmb();
6810
6811 /* display interrupts */
6812 cik_irq_ack(rdev);
6813
6814 while (rptr != wptr) {
6815 /* wptr/rptr are in bytes! */
6816 ring_index = rptr / 4;
6817 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6818 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6819 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
a59781bb
AD
6820
6821 switch (src_id) {
6822 case 1: /* D1 vblank/vline */
6823 switch (src_data) {
6824 case 0: /* D1 vblank */
6825 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6826 if (rdev->irq.crtc_vblank_int[0]) {
6827 drm_handle_vblank(rdev->ddev, 0);
6828 rdev->pm.vblank_sync = true;
6829 wake_up(&rdev->irq.vblank_queue);
6830 }
6831 if (atomic_read(&rdev->irq.pflip[0]))
6832 radeon_crtc_handle_flip(rdev, 0);
6833 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6834 DRM_DEBUG("IH: D1 vblank\n");
6835 }
6836 break;
6837 case 1: /* D1 vline */
6838 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6839 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6840 DRM_DEBUG("IH: D1 vline\n");
6841 }
6842 break;
6843 default:
6844 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6845 break;
6846 }
6847 break;
6848 case 2: /* D2 vblank/vline */
6849 switch (src_data) {
6850 case 0: /* D2 vblank */
6851 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6852 if (rdev->irq.crtc_vblank_int[1]) {
6853 drm_handle_vblank(rdev->ddev, 1);
6854 rdev->pm.vblank_sync = true;
6855 wake_up(&rdev->irq.vblank_queue);
6856 }
6857 if (atomic_read(&rdev->irq.pflip[1]))
6858 radeon_crtc_handle_flip(rdev, 1);
6859 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6860 DRM_DEBUG("IH: D2 vblank\n");
6861 }
6862 break;
6863 case 1: /* D2 vline */
6864 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6865 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6866 DRM_DEBUG("IH: D2 vline\n");
6867 }
6868 break;
6869 default:
6870 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6871 break;
6872 }
6873 break;
6874 case 3: /* D3 vblank/vline */
6875 switch (src_data) {
6876 case 0: /* D3 vblank */
6877 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6878 if (rdev->irq.crtc_vblank_int[2]) {
6879 drm_handle_vblank(rdev->ddev, 2);
6880 rdev->pm.vblank_sync = true;
6881 wake_up(&rdev->irq.vblank_queue);
6882 }
6883 if (atomic_read(&rdev->irq.pflip[2]))
6884 radeon_crtc_handle_flip(rdev, 2);
6885 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6886 DRM_DEBUG("IH: D3 vblank\n");
6887 }
6888 break;
6889 case 1: /* D3 vline */
6890 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6891 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6892 DRM_DEBUG("IH: D3 vline\n");
6893 }
6894 break;
6895 default:
6896 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6897 break;
6898 }
6899 break;
6900 case 4: /* D4 vblank/vline */
6901 switch (src_data) {
6902 case 0: /* D4 vblank */
6903 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6904 if (rdev->irq.crtc_vblank_int[3]) {
6905 drm_handle_vblank(rdev->ddev, 3);
6906 rdev->pm.vblank_sync = true;
6907 wake_up(&rdev->irq.vblank_queue);
6908 }
6909 if (atomic_read(&rdev->irq.pflip[3]))
6910 radeon_crtc_handle_flip(rdev, 3);
6911 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6912 DRM_DEBUG("IH: D4 vblank\n");
6913 }
6914 break;
6915 case 1: /* D4 vline */
6916 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6917 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6918 DRM_DEBUG("IH: D4 vline\n");
6919 }
6920 break;
6921 default:
6922 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6923 break;
6924 }
6925 break;
6926 case 5: /* D5 vblank/vline */
6927 switch (src_data) {
6928 case 0: /* D5 vblank */
6929 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6930 if (rdev->irq.crtc_vblank_int[4]) {
6931 drm_handle_vblank(rdev->ddev, 4);
6932 rdev->pm.vblank_sync = true;
6933 wake_up(&rdev->irq.vblank_queue);
6934 }
6935 if (atomic_read(&rdev->irq.pflip[4]))
6936 radeon_crtc_handle_flip(rdev, 4);
6937 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6938 DRM_DEBUG("IH: D5 vblank\n");
6939 }
6940 break;
6941 case 1: /* D5 vline */
6942 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6943 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6944 DRM_DEBUG("IH: D5 vline\n");
6945 }
6946 break;
6947 default:
6948 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6949 break;
6950 }
6951 break;
6952 case 6: /* D6 vblank/vline */
6953 switch (src_data) {
6954 case 0: /* D6 vblank */
6955 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6956 if (rdev->irq.crtc_vblank_int[5]) {
6957 drm_handle_vblank(rdev->ddev, 5);
6958 rdev->pm.vblank_sync = true;
6959 wake_up(&rdev->irq.vblank_queue);
6960 }
6961 if (atomic_read(&rdev->irq.pflip[5]))
6962 radeon_crtc_handle_flip(rdev, 5);
6963 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6964 DRM_DEBUG("IH: D6 vblank\n");
6965 }
6966 break;
6967 case 1: /* D6 vline */
6968 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6969 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6970 DRM_DEBUG("IH: D6 vline\n");
6971 }
6972 break;
6973 default:
6974 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6975 break;
6976 }
6977 break;
6978 case 42: /* HPD hotplug */
6979 switch (src_data) {
6980 case 0:
6981 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6982 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6983 queue_hotplug = true;
6984 DRM_DEBUG("IH: HPD1\n");
6985 }
6986 break;
6987 case 1:
6988 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6989 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6990 queue_hotplug = true;
6991 DRM_DEBUG("IH: HPD2\n");
6992 }
6993 break;
6994 case 2:
6995 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6996 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6997 queue_hotplug = true;
6998 DRM_DEBUG("IH: HPD3\n");
6999 }
7000 break;
7001 case 3:
7002 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7003 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7004 queue_hotplug = true;
7005 DRM_DEBUG("IH: HPD4\n");
7006 }
7007 break;
7008 case 4:
7009 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7010 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7011 queue_hotplug = true;
7012 DRM_DEBUG("IH: HPD5\n");
7013 }
7014 break;
7015 case 5:
7016 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7017 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7018 queue_hotplug = true;
7019 DRM_DEBUG("IH: HPD6\n");
7020 }
7021 break;
7022 default:
7023 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7024 break;
7025 }
7026 break;
6a3808b8
CK
7027 case 124: /* UVD */
7028 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7029 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7030 break;
9d97c99b
AD
7031 case 146:
7032 case 147:
3ec7d11b
AD
7033 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7034 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7035 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
9d97c99b
AD
7036 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7037 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3ec7d11b 7038 addr);
9d97c99b 7039 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3ec7d11b
AD
7040 status);
7041 cik_vm_decode_fault(rdev, status, addr, mc_client);
9d97c99b
AD
7042 /* reset addr and status */
7043 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7044 break;
a59781bb
AD
7045 case 176: /* GFX RB CP_INT */
7046 case 177: /* GFX IB CP_INT */
7047 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7048 break;
7049 case 181: /* CP EOP event */
7050 DRM_DEBUG("IH: CP EOP\n");
21a93e13
AD
7051 /* XXX check the bitfield order! */
7052 me_id = (ring_id & 0x60) >> 5;
7053 pipe_id = (ring_id & 0x18) >> 3;
7054 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
7055 switch (me_id) {
7056 case 0:
7057 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7058 break;
7059 case 1:
a59781bb 7060 case 2:
2b0781a6
AD
7061 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7062 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7063 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7064 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
a59781bb
AD
7065 break;
7066 }
7067 break;
7068 case 184: /* CP Privileged reg access */
7069 DRM_ERROR("Illegal register access in command stream\n");
7070 /* XXX check the bitfield order! */
7071 me_id = (ring_id & 0x60) >> 5;
7072 pipe_id = (ring_id & 0x18) >> 3;
7073 queue_id = (ring_id & 0x7) >> 0;
7074 switch (me_id) {
7075 case 0:
7076 /* This results in a full GPU reset, but all we need to do is soft
7077 * reset the CP for gfx
7078 */
7079 queue_reset = true;
7080 break;
7081 case 1:
7082 /* XXX compute */
2b0781a6 7083 queue_reset = true;
a59781bb
AD
7084 break;
7085 case 2:
7086 /* XXX compute */
2b0781a6 7087 queue_reset = true;
a59781bb
AD
7088 break;
7089 }
7090 break;
7091 case 185: /* CP Privileged inst */
7092 DRM_ERROR("Illegal instruction in command stream\n");
21a93e13
AD
7093 /* XXX check the bitfield order! */
7094 me_id = (ring_id & 0x60) >> 5;
7095 pipe_id = (ring_id & 0x18) >> 3;
7096 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
7097 switch (me_id) {
7098 case 0:
7099 /* This results in a full GPU reset, but all we need to do is soft
7100 * reset the CP for gfx
7101 */
7102 queue_reset = true;
7103 break;
7104 case 1:
7105 /* XXX compute */
2b0781a6 7106 queue_reset = true;
a59781bb
AD
7107 break;
7108 case 2:
7109 /* XXX compute */
2b0781a6 7110 queue_reset = true;
a59781bb
AD
7111 break;
7112 }
7113 break;
21a93e13
AD
7114 case 224: /* SDMA trap event */
7115 /* XXX check the bitfield order! */
7116 me_id = (ring_id & 0x3) >> 0;
7117 queue_id = (ring_id & 0xc) >> 2;
7118 DRM_DEBUG("IH: SDMA trap\n");
7119 switch (me_id) {
7120 case 0:
7121 switch (queue_id) {
7122 case 0:
7123 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7124 break;
7125 case 1:
7126 /* XXX compute */
7127 break;
7128 case 2:
7129 /* XXX compute */
7130 break;
7131 }
7132 break;
7133 case 1:
7134 switch (queue_id) {
7135 case 0:
7136 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7137 break;
7138 case 1:
7139 /* XXX compute */
7140 break;
7141 case 2:
7142 /* XXX compute */
7143 break;
7144 }
7145 break;
7146 }
7147 break;
41a524ab
AD
7148 case 230: /* thermal low to high */
7149 DRM_DEBUG("IH: thermal low to high\n");
7150 rdev->pm.dpm.thermal.high_to_low = false;
7151 queue_thermal = true;
7152 break;
7153 case 231: /* thermal high to low */
7154 DRM_DEBUG("IH: thermal high to low\n");
7155 rdev->pm.dpm.thermal.high_to_low = true;
7156 queue_thermal = true;
7157 break;
7158 case 233: /* GUI IDLE */
7159 DRM_DEBUG("IH: GUI idle\n");
7160 break;
21a93e13
AD
7161 case 241: /* SDMA Privileged inst */
7162 case 247: /* SDMA Privileged inst */
7163 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7164 /* XXX check the bitfield order! */
7165 me_id = (ring_id & 0x3) >> 0;
7166 queue_id = (ring_id & 0xc) >> 2;
7167 switch (me_id) {
7168 case 0:
7169 switch (queue_id) {
7170 case 0:
7171 queue_reset = true;
7172 break;
7173 case 1:
7174 /* XXX compute */
7175 queue_reset = true;
7176 break;
7177 case 2:
7178 /* XXX compute */
7179 queue_reset = true;
7180 break;
7181 }
7182 break;
7183 case 1:
7184 switch (queue_id) {
7185 case 0:
7186 queue_reset = true;
7187 break;
7188 case 1:
7189 /* XXX compute */
7190 queue_reset = true;
7191 break;
7192 case 2:
7193 /* XXX compute */
7194 queue_reset = true;
7195 break;
7196 }
7197 break;
7198 }
7199 break;
a59781bb
AD
7200 default:
7201 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7202 break;
7203 }
7204
7205 /* wptr/rptr are in bytes! */
7206 rptr += 16;
7207 rptr &= rdev->ih.ptr_mask;
7208 }
7209 if (queue_hotplug)
7210 schedule_work(&rdev->hotplug_work);
7211 if (queue_reset)
7212 schedule_work(&rdev->reset_work);
41a524ab
AD
7213 if (queue_thermal)
7214 schedule_work(&rdev->pm.dpm.thermal.work);
a59781bb
AD
7215 rdev->ih.rptr = rptr;
7216 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7217 atomic_set(&rdev->ih.lock, 0);
7218
7219 /* make sure wptr hasn't changed while processing */
7220 wptr = cik_get_ih_wptr(rdev);
7221 if (wptr != rptr)
7222 goto restart_ih;
7223
7224 return IRQ_HANDLED;
7225}
7bf94a2c
AD
7226
7227/*
7228 * startup/shutdown callbacks
7229 */
7230/**
7231 * cik_startup - program the asic to a functional state
7232 *
7233 * @rdev: radeon_device pointer
7234 *
7235 * Programs the asic to a functional state (CIK).
7236 * Called by cik_init() and cik_resume().
7237 * Returns 0 for success, error for failure.
7238 */
7239static int cik_startup(struct radeon_device *rdev)
7240{
7241 struct radeon_ring *ring;
7242 int r;
7243
8a7cd276
AD
7244 /* enable pcie gen2/3 link */
7245 cik_pcie_gen3_enable(rdev);
7235711a
AD
7246 /* enable aspm */
7247 cik_program_aspm(rdev);
8a7cd276 7248
e5903d39
AD
7249 /* scratch needs to be initialized before MC */
7250 r = r600_vram_scratch_init(rdev);
7251 if (r)
7252 return r;
7253
6fab3feb
AD
7254 cik_mc_program(rdev);
7255
7bf94a2c
AD
7256 if (rdev->flags & RADEON_IS_IGP) {
7257 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7258 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7259 r = cik_init_microcode(rdev);
7260 if (r) {
7261 DRM_ERROR("Failed to load firmware!\n");
7262 return r;
7263 }
7264 }
7265 } else {
7266 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7267 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7268 !rdev->mc_fw) {
7269 r = cik_init_microcode(rdev);
7270 if (r) {
7271 DRM_ERROR("Failed to load firmware!\n");
7272 return r;
7273 }
7274 }
7275
7276 r = ci_mc_load_microcode(rdev);
7277 if (r) {
7278 DRM_ERROR("Failed to load MC firmware!\n");
7279 return r;
7280 }
7281 }
7282
7bf94a2c
AD
7283 r = cik_pcie_gart_enable(rdev);
7284 if (r)
7285 return r;
7286 cik_gpu_init(rdev);
7287
7288 /* allocate rlc buffers */
22c775ce
AD
7289 if (rdev->flags & RADEON_IS_IGP) {
7290 if (rdev->family == CHIP_KAVERI) {
7291 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7292 rdev->rlc.reg_list_size =
7293 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7294 } else {
7295 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7296 rdev->rlc.reg_list_size =
7297 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7298 }
7299 }
7300 rdev->rlc.cs_data = ci_cs_data;
7301 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
1fd11777 7302 r = sumo_rlc_init(rdev);
7bf94a2c
AD
7303 if (r) {
7304 DRM_ERROR("Failed to init rlc BOs!\n");
7305 return r;
7306 }
7307
7308 /* allocate wb buffer */
7309 r = radeon_wb_init(rdev);
7310 if (r)
7311 return r;
7312
963e81f9
AD
7313 /* allocate mec buffers */
7314 r = cik_mec_init(rdev);
7315 if (r) {
7316 DRM_ERROR("Failed to init MEC BOs!\n");
7317 return r;
7318 }
7319
7bf94a2c
AD
7320 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7321 if (r) {
7322 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7323 return r;
7324 }
7325
963e81f9
AD
7326 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7327 if (r) {
7328 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7329 return r;
7330 }
7331
7332 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7333 if (r) {
7334 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7335 return r;
7336 }
7337
7bf94a2c
AD
7338 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7339 if (r) {
7340 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7341 return r;
7342 }
7343
7344 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7345 if (r) {
7346 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7347 return r;
7348 }
7349
2ce529da 7350 r = radeon_uvd_resume(rdev);
87167bb1 7351 if (!r) {
2ce529da
AD
7352 r = uvd_v4_2_resume(rdev);
7353 if (!r) {
7354 r = radeon_fence_driver_start_ring(rdev,
7355 R600_RING_TYPE_UVD_INDEX);
7356 if (r)
7357 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7358 }
87167bb1
CK
7359 }
7360 if (r)
7361 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7362
7bf94a2c
AD
7363 /* Enable IRQ */
7364 if (!rdev->irq.installed) {
7365 r = radeon_irq_kms_init(rdev);
7366 if (r)
7367 return r;
7368 }
7369
7370 r = cik_irq_init(rdev);
7371 if (r) {
7372 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7373 radeon_irq_kms_fini(rdev);
7374 return r;
7375 }
7376 cik_irq_set(rdev);
7377
7378 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7379 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7380 CP_RB0_RPTR, CP_RB0_WPTR,
1dac28eb 7381 PACKET3(PACKET3_NOP, 0x3FFF));
7bf94a2c
AD
7382 if (r)
7383 return r;
7384
963e81f9 7385 /* set up the compute queues */
2615b53a 7386 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
7387 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7388 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7389 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2e1e6dad 7390 PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
7391 if (r)
7392 return r;
7393 ring->me = 1; /* first MEC */
7394 ring->pipe = 0; /* first pipe */
7395 ring->queue = 0; /* first queue */
7396 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7397
2615b53a 7398 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
7399 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7400 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7401 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2e1e6dad 7402 PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
7403 if (r)
7404 return r;
7405 /* dGPU only have 1 MEC */
7406 ring->me = 1; /* first MEC */
7407 ring->pipe = 0; /* first pipe */
7408 ring->queue = 1; /* second queue */
7409 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7410
7bf94a2c
AD
7411 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7412 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7413 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7414 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
2e1e6dad 7415 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7bf94a2c
AD
7416 if (r)
7417 return r;
7418
7419 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7420 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7421 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7422 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
2e1e6dad 7423 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7bf94a2c
AD
7424 if (r)
7425 return r;
7426
7427 r = cik_cp_resume(rdev);
7428 if (r)
7429 return r;
7430
7431 r = cik_sdma_resume(rdev);
7432 if (r)
7433 return r;
7434
87167bb1
CK
7435 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7436 if (ring->ring_size) {
02c9f7fa 7437 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
87167bb1 7438 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2e1e6dad 7439 RADEON_CP_PACKET2);
87167bb1 7440 if (!r)
e409b128 7441 r = uvd_v1_0_init(rdev);
87167bb1
CK
7442 if (r)
7443 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7444 }
7445
7bf94a2c
AD
7446 r = radeon_ib_pool_init(rdev);
7447 if (r) {
7448 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7449 return r;
7450 }
7451
7452 r = radeon_vm_manager_init(rdev);
7453 if (r) {
7454 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7455 return r;
7456 }
7457
b530602f
AD
7458 r = dce6_audio_init(rdev);
7459 if (r)
7460 return r;
7461
7bf94a2c
AD
7462 return 0;
7463}
7464
7465/**
7466 * cik_resume - resume the asic to a functional state
7467 *
7468 * @rdev: radeon_device pointer
7469 *
7470 * Programs the asic to a functional state (CIK).
7471 * Called at resume.
7472 * Returns 0 for success, error for failure.
7473 */
7474int cik_resume(struct radeon_device *rdev)
7475{
7476 int r;
7477
7478 /* post card */
7479 atom_asic_init(rdev->mode_info.atom_context);
7480
0aafd313
AD
7481 /* init golden registers */
7482 cik_init_golden_registers(rdev);
7483
7bf94a2c
AD
7484 rdev->accel_working = true;
7485 r = cik_startup(rdev);
7486 if (r) {
7487 DRM_ERROR("cik startup failed on resume\n");
7488 rdev->accel_working = false;
7489 return r;
7490 }
7491
7492 return r;
7493
7494}
7495
7496/**
7497 * cik_suspend - suspend the asic
7498 *
7499 * @rdev: radeon_device pointer
7500 *
7501 * Bring the chip into a state suitable for suspend (CIK).
7502 * Called at suspend.
7503 * Returns 0 for success.
7504 */
7505int cik_suspend(struct radeon_device *rdev)
7506{
b530602f 7507 dce6_audio_fini(rdev);
7bf94a2c
AD
7508 radeon_vm_manager_fini(rdev);
7509 cik_cp_enable(rdev, false);
7510 cik_sdma_enable(rdev, false);
e409b128 7511 uvd_v1_0_fini(rdev);
87167bb1 7512 radeon_uvd_suspend(rdev);
473359bc
AD
7513 cik_fini_pg(rdev);
7514 cik_fini_cg(rdev);
7bf94a2c
AD
7515 cik_irq_suspend(rdev);
7516 radeon_wb_disable(rdev);
7517 cik_pcie_gart_disable(rdev);
7518 return 0;
7519}
7520
7521/* Plan is to move initialization in that function and use
7522 * helper function so that radeon_device_init pretty much
7523 * do nothing more than calling asic specific function. This
7524 * should also allow to remove a bunch of callback function
7525 * like vram_info.
7526 */
7527/**
7528 * cik_init - asic specific driver and hw init
7529 *
7530 * @rdev: radeon_device pointer
7531 *
7532 * Setup asic specific driver variables and program the hw
7533 * to a functional state (CIK).
7534 * Called at driver startup.
7535 * Returns 0 for success, errors for failure.
7536 */
7537int cik_init(struct radeon_device *rdev)
7538{
7539 struct radeon_ring *ring;
7540 int r;
7541
7542 /* Read BIOS */
7543 if (!radeon_get_bios(rdev)) {
7544 if (ASIC_IS_AVIVO(rdev))
7545 return -EINVAL;
7546 }
7547 /* Must be an ATOMBIOS */
7548 if (!rdev->is_atom_bios) {
7549 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7550 return -EINVAL;
7551 }
7552 r = radeon_atombios_init(rdev);
7553 if (r)
7554 return r;
7555
7556 /* Post card if necessary */
7557 if (!radeon_card_posted(rdev)) {
7558 if (!rdev->bios) {
7559 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7560 return -EINVAL;
7561 }
7562 DRM_INFO("GPU not posted. posting now...\n");
7563 atom_asic_init(rdev->mode_info.atom_context);
7564 }
0aafd313
AD
7565 /* init golden registers */
7566 cik_init_golden_registers(rdev);
7bf94a2c
AD
7567 /* Initialize scratch registers */
7568 cik_scratch_init(rdev);
7569 /* Initialize surface registers */
7570 radeon_surface_init(rdev);
7571 /* Initialize clocks */
7572 radeon_get_clock_info(rdev->ddev);
7573
7574 /* Fence driver */
7575 r = radeon_fence_driver_init(rdev);
7576 if (r)
7577 return r;
7578
7579 /* initialize memory controller */
7580 r = cik_mc_init(rdev);
7581 if (r)
7582 return r;
7583 /* Memory manager */
7584 r = radeon_bo_init(rdev);
7585 if (r)
7586 return r;
7587
7588 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7589 ring->ring_obj = NULL;
7590 r600_ring_init(rdev, ring, 1024 * 1024);
7591
963e81f9
AD
7592 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7593 ring->ring_obj = NULL;
7594 r600_ring_init(rdev, ring, 1024 * 1024);
7595 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7596 if (r)
7597 return r;
7598
7599 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7600 ring->ring_obj = NULL;
7601 r600_ring_init(rdev, ring, 1024 * 1024);
7602 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7603 if (r)
7604 return r;
7605
7bf94a2c
AD
7606 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7607 ring->ring_obj = NULL;
7608 r600_ring_init(rdev, ring, 256 * 1024);
7609
7610 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7611 ring->ring_obj = NULL;
7612 r600_ring_init(rdev, ring, 256 * 1024);
7613
87167bb1
CK
7614 r = radeon_uvd_init(rdev);
7615 if (!r) {
7616 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7617 ring->ring_obj = NULL;
7618 r600_ring_init(rdev, ring, 4096);
7619 }
7620
7bf94a2c
AD
7621 rdev->ih.ring_obj = NULL;
7622 r600_ih_ring_init(rdev, 64 * 1024);
7623
7624 r = r600_pcie_gart_init(rdev);
7625 if (r)
7626 return r;
7627
7628 rdev->accel_working = true;
7629 r = cik_startup(rdev);
7630 if (r) {
7631 dev_err(rdev->dev, "disabling GPU acceleration\n");
7632 cik_cp_fini(rdev);
7633 cik_sdma_fini(rdev);
7634 cik_irq_fini(rdev);
1fd11777 7635 sumo_rlc_fini(rdev);
963e81f9 7636 cik_mec_fini(rdev);
7bf94a2c
AD
7637 radeon_wb_fini(rdev);
7638 radeon_ib_pool_fini(rdev);
7639 radeon_vm_manager_fini(rdev);
7640 radeon_irq_kms_fini(rdev);
7641 cik_pcie_gart_fini(rdev);
7642 rdev->accel_working = false;
7643 }
7644
7645 /* Don't start up if the MC ucode is missing.
7646 * The default clocks and voltages before the MC ucode
7647 * is loaded are not suffient for advanced operations.
7648 */
7649 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7650 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7651 return -EINVAL;
7652 }
7653
7654 return 0;
7655}
7656
7657/**
7658 * cik_fini - asic specific driver and hw fini
7659 *
7660 * @rdev: radeon_device pointer
7661 *
7662 * Tear down the asic specific driver variables and program the hw
7663 * to an idle state (CIK).
7664 * Called at driver unload.
7665 */
7666void cik_fini(struct radeon_device *rdev)
7667{
7668 cik_cp_fini(rdev);
7669 cik_sdma_fini(rdev);
473359bc
AD
7670 cik_fini_pg(rdev);
7671 cik_fini_cg(rdev);
7bf94a2c 7672 cik_irq_fini(rdev);
1fd11777 7673 sumo_rlc_fini(rdev);
963e81f9 7674 cik_mec_fini(rdev);
7bf94a2c
AD
7675 radeon_wb_fini(rdev);
7676 radeon_vm_manager_fini(rdev);
7677 radeon_ib_pool_fini(rdev);
7678 radeon_irq_kms_fini(rdev);
e409b128 7679 uvd_v1_0_fini(rdev);
87167bb1 7680 radeon_uvd_fini(rdev);
7bf94a2c
AD
7681 cik_pcie_gart_fini(rdev);
7682 r600_vram_scratch_fini(rdev);
7683 radeon_gem_fini(rdev);
7684 radeon_fence_driver_fini(rdev);
7685 radeon_bo_fini(rdev);
7686 radeon_atombios_fini(rdev);
7687 kfree(rdev->bios);
7688 rdev->bios = NULL;
7689}
cd84a27d 7690
134b480f
AD
7691void dce8_program_fmt(struct drm_encoder *encoder)
7692{
7693 struct drm_device *dev = encoder->dev;
7694 struct radeon_device *rdev = dev->dev_private;
7695 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7696 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7697 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7698 int bpc = 0;
7699 u32 tmp = 0;
6214bb74 7700 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
134b480f 7701
6214bb74
AD
7702 if (connector) {
7703 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
134b480f 7704 bpc = radeon_get_monitor_bpc(connector);
6214bb74
AD
7705 dither = radeon_connector->dither;
7706 }
134b480f
AD
7707
7708 /* LVDS/eDP FMT is set up by atom */
7709 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7710 return;
7711
7712 /* not needed for analog */
7713 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7714 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7715 return;
7716
7717 if (bpc == 0)
7718 return;
7719
7720 switch (bpc) {
7721 case 6:
6214bb74 7722 if (dither == RADEON_FMT_DITHER_ENABLE)
134b480f
AD
7723 /* XXX sort out optimal dither settings */
7724 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7725 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7726 else
7727 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7728 break;
7729 case 8:
6214bb74 7730 if (dither == RADEON_FMT_DITHER_ENABLE)
134b480f
AD
7731 /* XXX sort out optimal dither settings */
7732 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7733 FMT_RGB_RANDOM_ENABLE |
7734 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7735 else
7736 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7737 break;
7738 case 10:
6214bb74 7739 if (dither == RADEON_FMT_DITHER_ENABLE)
134b480f
AD
7740 /* XXX sort out optimal dither settings */
7741 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7742 FMT_RGB_RANDOM_ENABLE |
7743 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7744 else
7745 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7746 break;
7747 default:
7748 /* not needed */
7749 break;
7750 }
7751
7752 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
7753}
7754
cd84a27d
AD
7755/* display watermark setup */
7756/**
7757 * dce8_line_buffer_adjust - Set up the line buffer
7758 *
7759 * @rdev: radeon_device pointer
7760 * @radeon_crtc: the selected display controller
7761 * @mode: the current display mode on the selected display
7762 * controller
7763 *
7764 * Setup up the line buffer allocation for
7765 * the selected display controller (CIK).
7766 * Returns the line buffer size in pixels.
7767 */
7768static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7769 struct radeon_crtc *radeon_crtc,
7770 struct drm_display_mode *mode)
7771{
bc01a8c7
AD
7772 u32 tmp, buffer_alloc, i;
7773 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
cd84a27d
AD
7774 /*
7775 * Line Buffer Setup
7776 * There are 6 line buffers, one for each display controllers.
7777 * There are 3 partitions per LB. Select the number of partitions
7778 * to enable based on the display width. For display widths larger
7779 * than 4096, you need use to use 2 display controllers and combine
7780 * them using the stereo blender.
7781 */
7782 if (radeon_crtc->base.enabled && mode) {
bc01a8c7 7783 if (mode->crtc_hdisplay < 1920) {
cd84a27d 7784 tmp = 1;
bc01a8c7
AD
7785 buffer_alloc = 2;
7786 } else if (mode->crtc_hdisplay < 2560) {
cd84a27d 7787 tmp = 2;
bc01a8c7
AD
7788 buffer_alloc = 2;
7789 } else if (mode->crtc_hdisplay < 4096) {
cd84a27d 7790 tmp = 0;
bc01a8c7
AD
7791 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7792 } else {
cd84a27d
AD
7793 DRM_DEBUG_KMS("Mode too big for LB!\n");
7794 tmp = 0;
bc01a8c7 7795 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
cd84a27d 7796 }
bc01a8c7 7797 } else {
cd84a27d 7798 tmp = 1;
bc01a8c7
AD
7799 buffer_alloc = 0;
7800 }
cd84a27d
AD
7801
7802 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7803 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7804
bc01a8c7
AD
7805 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7806 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7807 for (i = 0; i < rdev->usec_timeout; i++) {
7808 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7809 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7810 break;
7811 udelay(1);
7812 }
7813
cd84a27d
AD
7814 if (radeon_crtc->base.enabled && mode) {
7815 switch (tmp) {
7816 case 0:
7817 default:
7818 return 4096 * 2;
7819 case 1:
7820 return 1920 * 2;
7821 case 2:
7822 return 2560 * 2;
7823 }
7824 }
7825
7826 /* controller not enabled, so no lb used */
7827 return 0;
7828}
7829
7830/**
7831 * cik_get_number_of_dram_channels - get the number of dram channels
7832 *
7833 * @rdev: radeon_device pointer
7834 *
7835 * Look up the number of video ram channels (CIK).
7836 * Used for display watermark bandwidth calculations
7837 * Returns the number of dram channels
7838 */
7839static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7840{
7841 u32 tmp = RREG32(MC_SHARED_CHMAP);
7842
7843 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7844 case 0:
7845 default:
7846 return 1;
7847 case 1:
7848 return 2;
7849 case 2:
7850 return 4;
7851 case 3:
7852 return 8;
7853 case 4:
7854 return 3;
7855 case 5:
7856 return 6;
7857 case 6:
7858 return 10;
7859 case 7:
7860 return 12;
7861 case 8:
7862 return 16;
7863 }
7864}
7865
7866struct dce8_wm_params {
7867 u32 dram_channels; /* number of dram channels */
7868 u32 yclk; /* bandwidth per dram data pin in kHz */
7869 u32 sclk; /* engine clock in kHz */
7870 u32 disp_clk; /* display clock in kHz */
7871 u32 src_width; /* viewport width */
7872 u32 active_time; /* active display time in ns */
7873 u32 blank_time; /* blank time in ns */
7874 bool interlaced; /* mode is interlaced */
7875 fixed20_12 vsc; /* vertical scale ratio */
7876 u32 num_heads; /* number of active crtcs */
7877 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7878 u32 lb_size; /* line buffer allocated to pipe */
7879 u32 vtaps; /* vertical scaler taps */
7880};
7881
7882/**
7883 * dce8_dram_bandwidth - get the dram bandwidth
7884 *
7885 * @wm: watermark calculation data
7886 *
7887 * Calculate the raw dram bandwidth (CIK).
7888 * Used for display watermark bandwidth calculations
7889 * Returns the dram bandwidth in MBytes/s
7890 */
7891static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7892{
7893 /* Calculate raw DRAM Bandwidth */
7894 fixed20_12 dram_efficiency; /* 0.7 */
7895 fixed20_12 yclk, dram_channels, bandwidth;
7896 fixed20_12 a;
7897
7898 a.full = dfixed_const(1000);
7899 yclk.full = dfixed_const(wm->yclk);
7900 yclk.full = dfixed_div(yclk, a);
7901 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7902 a.full = dfixed_const(10);
7903 dram_efficiency.full = dfixed_const(7);
7904 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7905 bandwidth.full = dfixed_mul(dram_channels, yclk);
7906 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7907
7908 return dfixed_trunc(bandwidth);
7909}
7910
7911/**
7912 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7913 *
7914 * @wm: watermark calculation data
7915 *
7916 * Calculate the dram bandwidth used for display (CIK).
7917 * Used for display watermark bandwidth calculations
7918 * Returns the dram bandwidth for display in MBytes/s
7919 */
7920static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7921{
7922 /* Calculate DRAM Bandwidth and the part allocated to display. */
7923 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7924 fixed20_12 yclk, dram_channels, bandwidth;
7925 fixed20_12 a;
7926
7927 a.full = dfixed_const(1000);
7928 yclk.full = dfixed_const(wm->yclk);
7929 yclk.full = dfixed_div(yclk, a);
7930 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7931 a.full = dfixed_const(10);
7932 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7933 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7934 bandwidth.full = dfixed_mul(dram_channels, yclk);
7935 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7936
7937 return dfixed_trunc(bandwidth);
7938}
7939
7940/**
7941 * dce8_data_return_bandwidth - get the data return bandwidth
7942 *
7943 * @wm: watermark calculation data
7944 *
7945 * Calculate the data return bandwidth used for display (CIK).
7946 * Used for display watermark bandwidth calculations
7947 * Returns the data return bandwidth in MBytes/s
7948 */
7949static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7950{
7951 /* Calculate the display Data return Bandwidth */
7952 fixed20_12 return_efficiency; /* 0.8 */
7953 fixed20_12 sclk, bandwidth;
7954 fixed20_12 a;
7955
7956 a.full = dfixed_const(1000);
7957 sclk.full = dfixed_const(wm->sclk);
7958 sclk.full = dfixed_div(sclk, a);
7959 a.full = dfixed_const(10);
7960 return_efficiency.full = dfixed_const(8);
7961 return_efficiency.full = dfixed_div(return_efficiency, a);
7962 a.full = dfixed_const(32);
7963 bandwidth.full = dfixed_mul(a, sclk);
7964 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7965
7966 return dfixed_trunc(bandwidth);
7967}
7968
7969/**
7970 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7971 *
7972 * @wm: watermark calculation data
7973 *
7974 * Calculate the dmif bandwidth used for display (CIK).
7975 * Used for display watermark bandwidth calculations
7976 * Returns the dmif bandwidth in MBytes/s
7977 */
7978static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7979{
7980 /* Calculate the DMIF Request Bandwidth */
7981 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7982 fixed20_12 disp_clk, bandwidth;
7983 fixed20_12 a, b;
7984
7985 a.full = dfixed_const(1000);
7986 disp_clk.full = dfixed_const(wm->disp_clk);
7987 disp_clk.full = dfixed_div(disp_clk, a);
7988 a.full = dfixed_const(32);
7989 b.full = dfixed_mul(a, disp_clk);
7990
7991 a.full = dfixed_const(10);
7992 disp_clk_request_efficiency.full = dfixed_const(8);
7993 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7994
7995 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7996
7997 return dfixed_trunc(bandwidth);
7998}
7999
8000/**
8001 * dce8_available_bandwidth - get the min available bandwidth
8002 *
8003 * @wm: watermark calculation data
8004 *
8005 * Calculate the min available bandwidth used for display (CIK).
8006 * Used for display watermark bandwidth calculations
8007 * Returns the min available bandwidth in MBytes/s
8008 */
8009static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8010{
8011 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8012 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8013 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8014 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8015
8016 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8017}
8018
8019/**
8020 * dce8_average_bandwidth - get the average available bandwidth
8021 *
8022 * @wm: watermark calculation data
8023 *
8024 * Calculate the average available bandwidth used for display (CIK).
8025 * Used for display watermark bandwidth calculations
8026 * Returns the average available bandwidth in MBytes/s
8027 */
8028static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8029{
8030 /* Calculate the display mode Average Bandwidth
8031 * DisplayMode should contain the source and destination dimensions,
8032 * timing, etc.
8033 */
8034 fixed20_12 bpp;
8035 fixed20_12 line_time;
8036 fixed20_12 src_width;
8037 fixed20_12 bandwidth;
8038 fixed20_12 a;
8039
8040 a.full = dfixed_const(1000);
8041 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8042 line_time.full = dfixed_div(line_time, a);
8043 bpp.full = dfixed_const(wm->bytes_per_pixel);
8044 src_width.full = dfixed_const(wm->src_width);
8045 bandwidth.full = dfixed_mul(src_width, bpp);
8046 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8047 bandwidth.full = dfixed_div(bandwidth, line_time);
8048
8049 return dfixed_trunc(bandwidth);
8050}
8051
8052/**
8053 * dce8_latency_watermark - get the latency watermark
8054 *
8055 * @wm: watermark calculation data
8056 *
8057 * Calculate the latency watermark (CIK).
8058 * Used for display watermark bandwidth calculations
8059 * Returns the latency watermark in ns
8060 */
8061static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8062{
8063 /* First calculate the latency in ns */
8064 u32 mc_latency = 2000; /* 2000 ns. */
8065 u32 available_bandwidth = dce8_available_bandwidth(wm);
8066 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8067 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8068 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8069 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8070 (wm->num_heads * cursor_line_pair_return_time);
8071 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8072 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8073 u32 tmp, dmif_size = 12288;
8074 fixed20_12 a, b, c;
8075
8076 if (wm->num_heads == 0)
8077 return 0;
8078
8079 a.full = dfixed_const(2);
8080 b.full = dfixed_const(1);
8081 if ((wm->vsc.full > a.full) ||
8082 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8083 (wm->vtaps >= 5) ||
8084 ((wm->vsc.full >= a.full) && wm->interlaced))
8085 max_src_lines_per_dst_line = 4;
8086 else
8087 max_src_lines_per_dst_line = 2;
8088
8089 a.full = dfixed_const(available_bandwidth);
8090 b.full = dfixed_const(wm->num_heads);
8091 a.full = dfixed_div(a, b);
8092
8093 b.full = dfixed_const(mc_latency + 512);
8094 c.full = dfixed_const(wm->disp_clk);
8095 b.full = dfixed_div(b, c);
8096
8097 c.full = dfixed_const(dmif_size);
8098 b.full = dfixed_div(c, b);
8099
8100 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8101
8102 b.full = dfixed_const(1000);
8103 c.full = dfixed_const(wm->disp_clk);
8104 b.full = dfixed_div(c, b);
8105 c.full = dfixed_const(wm->bytes_per_pixel);
8106 b.full = dfixed_mul(b, c);
8107
8108 lb_fill_bw = min(tmp, dfixed_trunc(b));
8109
8110 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8111 b.full = dfixed_const(1000);
8112 c.full = dfixed_const(lb_fill_bw);
8113 b.full = dfixed_div(c, b);
8114 a.full = dfixed_div(a, b);
8115 line_fill_time = dfixed_trunc(a);
8116
8117 if (line_fill_time < wm->active_time)
8118 return latency;
8119 else
8120 return latency + (line_fill_time - wm->active_time);
8121
8122}
8123
8124/**
8125 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8126 * average and available dram bandwidth
8127 *
8128 * @wm: watermark calculation data
8129 *
8130 * Check if the display average bandwidth fits in the display
8131 * dram bandwidth (CIK).
8132 * Used for display watermark bandwidth calculations
8133 * Returns true if the display fits, false if not.
8134 */
8135static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8136{
8137 if (dce8_average_bandwidth(wm) <=
8138 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8139 return true;
8140 else
8141 return false;
8142}
8143
8144/**
8145 * dce8_average_bandwidth_vs_available_bandwidth - check
8146 * average and available bandwidth
8147 *
8148 * @wm: watermark calculation data
8149 *
8150 * Check if the display average bandwidth fits in the display
8151 * available bandwidth (CIK).
8152 * Used for display watermark bandwidth calculations
8153 * Returns true if the display fits, false if not.
8154 */
8155static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8156{
8157 if (dce8_average_bandwidth(wm) <=
8158 (dce8_available_bandwidth(wm) / wm->num_heads))
8159 return true;
8160 else
8161 return false;
8162}
8163
8164/**
8165 * dce8_check_latency_hiding - check latency hiding
8166 *
8167 * @wm: watermark calculation data
8168 *
8169 * Check latency hiding (CIK).
8170 * Used for display watermark bandwidth calculations
8171 * Returns true if the display fits, false if not.
8172 */
8173static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8174{
8175 u32 lb_partitions = wm->lb_size / wm->src_width;
8176 u32 line_time = wm->active_time + wm->blank_time;
8177 u32 latency_tolerant_lines;
8178 u32 latency_hiding;
8179 fixed20_12 a;
8180
8181 a.full = dfixed_const(1);
8182 if (wm->vsc.full > a.full)
8183 latency_tolerant_lines = 1;
8184 else {
8185 if (lb_partitions <= (wm->vtaps + 1))
8186 latency_tolerant_lines = 1;
8187 else
8188 latency_tolerant_lines = 2;
8189 }
8190
8191 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8192
8193 if (dce8_latency_watermark(wm) <= latency_hiding)
8194 return true;
8195 else
8196 return false;
8197}
8198
8199/**
8200 * dce8_program_watermarks - program display watermarks
8201 *
8202 * @rdev: radeon_device pointer
8203 * @radeon_crtc: the selected display controller
8204 * @lb_size: line buffer size
8205 * @num_heads: number of display controllers in use
8206 *
8207 * Calculate and program the display watermarks for the
8208 * selected display controller (CIK).
8209 */
8210static void dce8_program_watermarks(struct radeon_device *rdev,
8211 struct radeon_crtc *radeon_crtc,
8212 u32 lb_size, u32 num_heads)
8213{
8214 struct drm_display_mode *mode = &radeon_crtc->base.mode;
58ea2dea 8215 struct dce8_wm_params wm_low, wm_high;
cd84a27d
AD
8216 u32 pixel_period;
8217 u32 line_time = 0;
8218 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8219 u32 tmp, wm_mask;
8220
8221 if (radeon_crtc->base.enabled && num_heads && mode) {
8222 pixel_period = 1000000 / (u32)mode->clock;
8223 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8224
58ea2dea
AD
8225 /* watermark for high clocks */
8226 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8227 rdev->pm.dpm_enabled) {
8228 wm_high.yclk =
8229 radeon_dpm_get_mclk(rdev, false) * 10;
8230 wm_high.sclk =
8231 radeon_dpm_get_sclk(rdev, false) * 10;
8232 } else {
8233 wm_high.yclk = rdev->pm.current_mclk * 10;
8234 wm_high.sclk = rdev->pm.current_sclk * 10;
8235 }
8236
8237 wm_high.disp_clk = mode->clock;
8238 wm_high.src_width = mode->crtc_hdisplay;
8239 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8240 wm_high.blank_time = line_time - wm_high.active_time;
8241 wm_high.interlaced = false;
cd84a27d 8242 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
58ea2dea
AD
8243 wm_high.interlaced = true;
8244 wm_high.vsc = radeon_crtc->vsc;
8245 wm_high.vtaps = 1;
cd84a27d 8246 if (radeon_crtc->rmx_type != RMX_OFF)
58ea2dea
AD
8247 wm_high.vtaps = 2;
8248 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8249 wm_high.lb_size = lb_size;
8250 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8251 wm_high.num_heads = num_heads;
cd84a27d
AD
8252
8253 /* set for high clocks */
58ea2dea
AD
8254 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8255
8256 /* possibly force display priority to high */
8257 /* should really do this at mode validation time... */
8258 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8259 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8260 !dce8_check_latency_hiding(&wm_high) ||
8261 (rdev->disp_priority == 2)) {
8262 DRM_DEBUG_KMS("force priority to high\n");
8263 }
8264
8265 /* watermark for low clocks */
8266 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8267 rdev->pm.dpm_enabled) {
8268 wm_low.yclk =
8269 radeon_dpm_get_mclk(rdev, true) * 10;
8270 wm_low.sclk =
8271 radeon_dpm_get_sclk(rdev, true) * 10;
8272 } else {
8273 wm_low.yclk = rdev->pm.current_mclk * 10;
8274 wm_low.sclk = rdev->pm.current_sclk * 10;
8275 }
8276
8277 wm_low.disp_clk = mode->clock;
8278 wm_low.src_width = mode->crtc_hdisplay;
8279 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8280 wm_low.blank_time = line_time - wm_low.active_time;
8281 wm_low.interlaced = false;
8282 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8283 wm_low.interlaced = true;
8284 wm_low.vsc = radeon_crtc->vsc;
8285 wm_low.vtaps = 1;
8286 if (radeon_crtc->rmx_type != RMX_OFF)
8287 wm_low.vtaps = 2;
8288 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8289 wm_low.lb_size = lb_size;
8290 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8291 wm_low.num_heads = num_heads;
8292
cd84a27d 8293 /* set for low clocks */
58ea2dea 8294 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
cd84a27d
AD
8295
8296 /* possibly force display priority to high */
8297 /* should really do this at mode validation time... */
58ea2dea
AD
8298 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8299 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8300 !dce8_check_latency_hiding(&wm_low) ||
cd84a27d
AD
8301 (rdev->disp_priority == 2)) {
8302 DRM_DEBUG_KMS("force priority to high\n");
8303 }
8304 }
8305
8306 /* select wm A */
8307 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8308 tmp = wm_mask;
8309 tmp &= ~LATENCY_WATERMARK_MASK(3);
8310 tmp |= LATENCY_WATERMARK_MASK(1);
8311 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8312 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8313 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8314 LATENCY_HIGH_WATERMARK(line_time)));
8315 /* select wm B */
8316 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8317 tmp &= ~LATENCY_WATERMARK_MASK(3);
8318 tmp |= LATENCY_WATERMARK_MASK(2);
8319 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8320 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8321 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8322 LATENCY_HIGH_WATERMARK(line_time)));
8323 /* restore original selection */
8324 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
58ea2dea
AD
8325
8326 /* save values for DPM */
8327 radeon_crtc->line_time = line_time;
8328 radeon_crtc->wm_high = latency_watermark_a;
8329 radeon_crtc->wm_low = latency_watermark_b;
cd84a27d
AD
8330}
8331
8332/**
8333 * dce8_bandwidth_update - program display watermarks
8334 *
8335 * @rdev: radeon_device pointer
8336 *
8337 * Calculate and program the display watermarks and line
8338 * buffer allocation (CIK).
8339 */
8340void dce8_bandwidth_update(struct radeon_device *rdev)
8341{
8342 struct drm_display_mode *mode = NULL;
8343 u32 num_heads = 0, lb_size;
8344 int i;
8345
8346 radeon_update_display_priority(rdev);
8347
8348 for (i = 0; i < rdev->num_crtc; i++) {
8349 if (rdev->mode_info.crtcs[i]->base.enabled)
8350 num_heads++;
8351 }
8352 for (i = 0; i < rdev->num_crtc; i++) {
8353 mode = &rdev->mode_info.crtcs[i]->base.mode;
8354 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8355 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8356 }
8357}
44fa346f
AD
8358
8359/**
8360 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8361 *
8362 * @rdev: radeon_device pointer
8363 *
8364 * Fetches a GPU clock counter snapshot (SI).
8365 * Returns the 64 bit clock counter snapshot.
8366 */
8367uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8368{
8369 uint64_t clock;
8370
8371 mutex_lock(&rdev->gpu_clock_mutex);
8372 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8373 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8374 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8375 mutex_unlock(&rdev->gpu_clock_mutex);
8376 return clock;
8377}
8378
87167bb1
CK
8379static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8380 u32 cntl_reg, u32 status_reg)
8381{
8382 int r, i;
8383 struct atom_clock_dividers dividers;
8384 uint32_t tmp;
8385
8386 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8387 clock, false, &dividers);
8388 if (r)
8389 return r;
8390
8391 tmp = RREG32_SMC(cntl_reg);
8392 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8393 tmp |= dividers.post_divider;
8394 WREG32_SMC(cntl_reg, tmp);
8395
8396 for (i = 0; i < 100; i++) {
8397 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8398 break;
8399 mdelay(10);
8400 }
8401 if (i == 100)
8402 return -ETIMEDOUT;
8403
8404 return 0;
8405}
8406
8407int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8408{
8409 int r = 0;
8410
8411 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8412 if (r)
8413 return r;
8414
8415 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8416 return r;
8417}
8418
8a7cd276 8419static void cik_pcie_gen3_enable(struct radeon_device *rdev)
87167bb1 8420{
8a7cd276
AD
8421 struct pci_dev *root = rdev->pdev->bus->self;
8422 int bridge_pos, gpu_pos;
8423 u32 speed_cntl, mask, current_data_rate;
8424 int ret, i;
8425 u16 tmp16;
87167bb1 8426
8a7cd276
AD
8427 if (radeon_pcie_gen2 == 0)
8428 return;
87167bb1 8429
8a7cd276
AD
8430 if (rdev->flags & RADEON_IS_IGP)
8431 return;
87167bb1 8432
8a7cd276
AD
8433 if (!(rdev->flags & RADEON_IS_PCIE))
8434 return;
87167bb1 8435
8a7cd276
AD
8436 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8437 if (ret != 0)
8438 return;
87167bb1 8439
8a7cd276
AD
8440 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8441 return;
87167bb1 8442
8a7cd276
AD
8443 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8444 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8445 LC_CURRENT_DATA_RATE_SHIFT;
8446 if (mask & DRM_PCIE_SPEED_80) {
8447 if (current_data_rate == 2) {
8448 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8449 return;
8450 }
8451 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8452 } else if (mask & DRM_PCIE_SPEED_50) {
8453 if (current_data_rate == 1) {
8454 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8455 return;
8456 }
8457 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8458 }
87167bb1 8459
8a7cd276
AD
8460 bridge_pos = pci_pcie_cap(root);
8461 if (!bridge_pos)
8462 return;
8463
8464 gpu_pos = pci_pcie_cap(rdev->pdev);
8465 if (!gpu_pos)
8466 return;
8467
8468 if (mask & DRM_PCIE_SPEED_80) {
8469 /* re-try equalization if gen3 is not already enabled */
8470 if (current_data_rate != 2) {
8471 u16 bridge_cfg, gpu_cfg;
8472 u16 bridge_cfg2, gpu_cfg2;
8473 u32 max_lw, current_lw, tmp;
8474
8475 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8476 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8477
8478 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8479 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8480
8481 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8482 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8483
8484 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8485 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8486 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8487
8488 if (current_lw < max_lw) {
8489 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8490 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8491 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8492 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8493 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8494 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8495 }
8496 }
8497
8498 for (i = 0; i < 10; i++) {
8499 /* check status */
8500 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8501 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8502 break;
8503
8504 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8505 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8506
8507 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8508 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8509
8510 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8511 tmp |= LC_SET_QUIESCE;
8512 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8513
8514 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8515 tmp |= LC_REDO_EQ;
8516 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8517
8518 mdelay(100);
8519
8520 /* linkctl */
8521 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8522 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8523 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8524 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8525
8526 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8527 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8528 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8529 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8530
8531 /* linkctl2 */
8532 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8533 tmp16 &= ~((1 << 4) | (7 << 9));
8534 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8535 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8536
8537 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8538 tmp16 &= ~((1 << 4) | (7 << 9));
8539 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8540 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8541
8542 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8543 tmp &= ~LC_SET_QUIESCE;
8544 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8545 }
8546 }
8547 }
8548
8549 /* set the link speed */
8550 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8551 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8552 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8553
8554 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8555 tmp16 &= ~0xf;
8556 if (mask & DRM_PCIE_SPEED_80)
8557 tmp16 |= 3; /* gen3 */
8558 else if (mask & DRM_PCIE_SPEED_50)
8559 tmp16 |= 2; /* gen2 */
8560 else
8561 tmp16 |= 1; /* gen1 */
8562 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8563
8564 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8565 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8566 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8567
8568 for (i = 0; i < rdev->usec_timeout; i++) {
8569 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8570 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8571 break;
8572 udelay(1);
8573 }
8574}
7235711a
AD
8575
8576static void cik_program_aspm(struct radeon_device *rdev)
8577{
8578 u32 data, orig;
8579 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8580 bool disable_clkreq = false;
8581
8582 if (radeon_aspm == 0)
8583 return;
8584
8585 /* XXX double check IGPs */
8586 if (rdev->flags & RADEON_IS_IGP)
8587 return;
8588
8589 if (!(rdev->flags & RADEON_IS_PCIE))
8590 return;
8591
8592 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8593 data &= ~LC_XMIT_N_FTS_MASK;
8594 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8595 if (orig != data)
8596 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8597
8598 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8599 data |= LC_GO_TO_RECOVERY;
8600 if (orig != data)
8601 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8602
8603 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8604 data |= P_IGNORE_EDB_ERR;
8605 if (orig != data)
8606 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8607
8608 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8609 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8610 data |= LC_PMI_TO_L1_DIS;
8611 if (!disable_l0s)
8612 data |= LC_L0S_INACTIVITY(7);
8613
8614 if (!disable_l1) {
8615 data |= LC_L1_INACTIVITY(7);
8616 data &= ~LC_PMI_TO_L1_DIS;
8617 if (orig != data)
8618 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8619
8620 if (!disable_plloff_in_l1) {
8621 bool clk_req_support;
8622
8623 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8624 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8625 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8626 if (orig != data)
8627 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8628
8629 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8630 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8631 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8632 if (orig != data)
8633 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8634
8635 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8636 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8637 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8638 if (orig != data)
8639 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8640
8641 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8642 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8643 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8644 if (orig != data)
8645 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8646
8647 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8648 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8649 data |= LC_DYN_LANES_PWR_STATE(3);
8650 if (orig != data)
8651 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8652
8653 if (!disable_clkreq) {
8654 struct pci_dev *root = rdev->pdev->bus->self;
8655 u32 lnkcap;
8656
8657 clk_req_support = false;
8658 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8659 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8660 clk_req_support = true;
8661 } else {
8662 clk_req_support = false;
8663 }
8664
8665 if (clk_req_support) {
8666 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8667 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8668 if (orig != data)
8669 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8670
8671 orig = data = RREG32_SMC(THM_CLK_CNTL);
8672 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8673 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8674 if (orig != data)
8675 WREG32_SMC(THM_CLK_CNTL, data);
8676
8677 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8678 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8679 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8680 if (orig != data)
8681 WREG32_SMC(MISC_CLK_CTRL, data);
8682
8683 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8684 data &= ~BCLK_AS_XCLK;
8685 if (orig != data)
8686 WREG32_SMC(CG_CLKPIN_CNTL, data);
8687
8688 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8689 data &= ~FORCE_BIF_REFCLK_EN;
8690 if (orig != data)
8691 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8692
8693 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8694 data &= ~MPLL_CLKOUT_SEL_MASK;
8695 data |= MPLL_CLKOUT_SEL(4);
8696 if (orig != data)
8697 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8698 }
8699 }
8700 } else {
8701 if (orig != data)
8702 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8703 }
8704
8705 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8706 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8707 if (orig != data)
8708 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8709
8710 if (!disable_l0s) {
8711 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8712 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8713 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8714 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8715 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8716 data &= ~LC_L0S_INACTIVITY_MASK;
8717 if (orig != data)
8718 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8719 }
8720 }
8721 }
87167bb1 8722}