drm/radeon: add KB/KV to r600_is_internal_thermal_sensor
[linux-block.git] / drivers / gpu / drm / radeon / cik.c
CommitLineData
8cc1a532
AD
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
8cc1a532
AD
25#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
6f2043ce 29#include "radeon_asic.h"
8cc1a532
AD
30#include "cikd.h"
31#include "atom.h"
841cf442 32#include "cik_blit_shaders.h"
8c68e393 33#include "radeon_ucode.h"
22c775ce 34#include "clearstate_ci.h"
02c81327
AD
35
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
21a93e13 42MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
02c81327
AD
43MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
44MODULE_FIRMWARE("radeon/KAVERI_me.bin");
45MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
46MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
47MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
21a93e13 48MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
02c81327
AD
49MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
50MODULE_FIRMWARE("radeon/KABINI_me.bin");
51MODULE_FIRMWARE("radeon/KABINI_ce.bin");
52MODULE_FIRMWARE("radeon/KABINI_mec.bin");
53MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
21a93e13 54MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
02c81327 55
a59781bb
AD
56extern int r600_ih_ring_alloc(struct radeon_device *rdev);
57extern void r600_ih_ring_fini(struct radeon_device *rdev);
6f2043ce
AD
58extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
59extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
cc066715 60extern bool evergreen_is_display_hung(struct radeon_device *rdev);
1fd11777
AD
61extern void sumo_rlc_fini(struct radeon_device *rdev);
62extern int sumo_rlc_init(struct radeon_device *rdev);
1c49165d 63extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
866d83de 64extern void si_rlc_reset(struct radeon_device *rdev);
22c775ce 65extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
cc066715 66static void cik_rlc_stop(struct radeon_device *rdev);
8a7cd276 67static void cik_pcie_gen3_enable(struct radeon_device *rdev);
7235711a 68static void cik_program_aspm(struct radeon_device *rdev);
22c775ce
AD
69static void cik_init_pg(struct radeon_device *rdev);
70static void cik_init_cg(struct radeon_device *rdev);
6f2043ce 71
6e2c3c0a
AD
72/*
73 * Indirect registers accessor
74 */
75u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
76{
77 u32 r;
78
79 WREG32(PCIE_INDEX, reg);
80 (void)RREG32(PCIE_INDEX);
81 r = RREG32(PCIE_DATA);
82 return r;
83}
84
85void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
86{
87 WREG32(PCIE_INDEX, reg);
88 (void)RREG32(PCIE_INDEX);
89 WREG32(PCIE_DATA, v);
90 (void)RREG32(PCIE_DATA);
91}
92
22c775ce
AD
93static const u32 spectre_rlc_save_restore_register_list[] =
94{
95 (0x0e00 << 16) | (0xc12c >> 2),
96 0x00000000,
97 (0x0e00 << 16) | (0xc140 >> 2),
98 0x00000000,
99 (0x0e00 << 16) | (0xc150 >> 2),
100 0x00000000,
101 (0x0e00 << 16) | (0xc15c >> 2),
102 0x00000000,
103 (0x0e00 << 16) | (0xc168 >> 2),
104 0x00000000,
105 (0x0e00 << 16) | (0xc170 >> 2),
106 0x00000000,
107 (0x0e00 << 16) | (0xc178 >> 2),
108 0x00000000,
109 (0x0e00 << 16) | (0xc204 >> 2),
110 0x00000000,
111 (0x0e00 << 16) | (0xc2b4 >> 2),
112 0x00000000,
113 (0x0e00 << 16) | (0xc2b8 >> 2),
114 0x00000000,
115 (0x0e00 << 16) | (0xc2bc >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc2c0 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0x8228 >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0x829c >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0x869c >> 2),
124 0x00000000,
125 (0x0600 << 16) | (0x98f4 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0x98f8 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0x9900 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc260 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0x90e8 >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0x3c000 >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0x3c00c >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x8c1c >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x9700 >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0xcd20 >> 2),
144 0x00000000,
145 (0x4e00 << 16) | (0xcd20 >> 2),
146 0x00000000,
147 (0x5e00 << 16) | (0xcd20 >> 2),
148 0x00000000,
149 (0x6e00 << 16) | (0xcd20 >> 2),
150 0x00000000,
151 (0x7e00 << 16) | (0xcd20 >> 2),
152 0x00000000,
153 (0x8e00 << 16) | (0xcd20 >> 2),
154 0x00000000,
155 (0x9e00 << 16) | (0xcd20 >> 2),
156 0x00000000,
157 (0xae00 << 16) | (0xcd20 >> 2),
158 0x00000000,
159 (0xbe00 << 16) | (0xcd20 >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x89bc >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0x8900 >> 2),
164 0x00000000,
165 0x3,
166 (0x0e00 << 16) | (0xc130 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0xc134 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc1fc >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0xc208 >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0xc264 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0xc268 >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0xc26c >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0xc270 >> 2),
181 0x00000000,
182 (0x0e00 << 16) | (0xc274 >> 2),
183 0x00000000,
184 (0x0e00 << 16) | (0xc278 >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0xc27c >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc280 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc284 >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc288 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc28c >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc290 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc294 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc298 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc29c >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc2a0 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc2a4 >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc2a8 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc2ac >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc2b0 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0x301d0 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0x30238 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0x30250 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0x30254 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0x30258 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0x3025c >> 2),
225 0x00000000,
226 (0x4e00 << 16) | (0xc900 >> 2),
227 0x00000000,
228 (0x5e00 << 16) | (0xc900 >> 2),
229 0x00000000,
230 (0x6e00 << 16) | (0xc900 >> 2),
231 0x00000000,
232 (0x7e00 << 16) | (0xc900 >> 2),
233 0x00000000,
234 (0x8e00 << 16) | (0xc900 >> 2),
235 0x00000000,
236 (0x9e00 << 16) | (0xc900 >> 2),
237 0x00000000,
238 (0xae00 << 16) | (0xc900 >> 2),
239 0x00000000,
240 (0xbe00 << 16) | (0xc900 >> 2),
241 0x00000000,
242 (0x4e00 << 16) | (0xc904 >> 2),
243 0x00000000,
244 (0x5e00 << 16) | (0xc904 >> 2),
245 0x00000000,
246 (0x6e00 << 16) | (0xc904 >> 2),
247 0x00000000,
248 (0x7e00 << 16) | (0xc904 >> 2),
249 0x00000000,
250 (0x8e00 << 16) | (0xc904 >> 2),
251 0x00000000,
252 (0x9e00 << 16) | (0xc904 >> 2),
253 0x00000000,
254 (0xae00 << 16) | (0xc904 >> 2),
255 0x00000000,
256 (0xbe00 << 16) | (0xc904 >> 2),
257 0x00000000,
258 (0x4e00 << 16) | (0xc908 >> 2),
259 0x00000000,
260 (0x5e00 << 16) | (0xc908 >> 2),
261 0x00000000,
262 (0x6e00 << 16) | (0xc908 >> 2),
263 0x00000000,
264 (0x7e00 << 16) | (0xc908 >> 2),
265 0x00000000,
266 (0x8e00 << 16) | (0xc908 >> 2),
267 0x00000000,
268 (0x9e00 << 16) | (0xc908 >> 2),
269 0x00000000,
270 (0xae00 << 16) | (0xc908 >> 2),
271 0x00000000,
272 (0xbe00 << 16) | (0xc908 >> 2),
273 0x00000000,
274 (0x4e00 << 16) | (0xc90c >> 2),
275 0x00000000,
276 (0x5e00 << 16) | (0xc90c >> 2),
277 0x00000000,
278 (0x6e00 << 16) | (0xc90c >> 2),
279 0x00000000,
280 (0x7e00 << 16) | (0xc90c >> 2),
281 0x00000000,
282 (0x8e00 << 16) | (0xc90c >> 2),
283 0x00000000,
284 (0x9e00 << 16) | (0xc90c >> 2),
285 0x00000000,
286 (0xae00 << 16) | (0xc90c >> 2),
287 0x00000000,
288 (0xbe00 << 16) | (0xc90c >> 2),
289 0x00000000,
290 (0x4e00 << 16) | (0xc910 >> 2),
291 0x00000000,
292 (0x5e00 << 16) | (0xc910 >> 2),
293 0x00000000,
294 (0x6e00 << 16) | (0xc910 >> 2),
295 0x00000000,
296 (0x7e00 << 16) | (0xc910 >> 2),
297 0x00000000,
298 (0x8e00 << 16) | (0xc910 >> 2),
299 0x00000000,
300 (0x9e00 << 16) | (0xc910 >> 2),
301 0x00000000,
302 (0xae00 << 16) | (0xc910 >> 2),
303 0x00000000,
304 (0xbe00 << 16) | (0xc910 >> 2),
305 0x00000000,
306 (0x0e00 << 16) | (0xc99c >> 2),
307 0x00000000,
308 (0x0e00 << 16) | (0x9834 >> 2),
309 0x00000000,
310 (0x0000 << 16) | (0x30f00 >> 2),
311 0x00000000,
312 (0x0001 << 16) | (0x30f00 >> 2),
313 0x00000000,
314 (0x0000 << 16) | (0x30f04 >> 2),
315 0x00000000,
316 (0x0001 << 16) | (0x30f04 >> 2),
317 0x00000000,
318 (0x0000 << 16) | (0x30f08 >> 2),
319 0x00000000,
320 (0x0001 << 16) | (0x30f08 >> 2),
321 0x00000000,
322 (0x0000 << 16) | (0x30f0c >> 2),
323 0x00000000,
324 (0x0001 << 16) | (0x30f0c >> 2),
325 0x00000000,
326 (0x0600 << 16) | (0x9b7c >> 2),
327 0x00000000,
328 (0x0e00 << 16) | (0x8a14 >> 2),
329 0x00000000,
330 (0x0e00 << 16) | (0x8a18 >> 2),
331 0x00000000,
332 (0x0600 << 16) | (0x30a00 >> 2),
333 0x00000000,
334 (0x0e00 << 16) | (0x8bf0 >> 2),
335 0x00000000,
336 (0x0e00 << 16) | (0x8bcc >> 2),
337 0x00000000,
338 (0x0e00 << 16) | (0x8b24 >> 2),
339 0x00000000,
340 (0x0e00 << 16) | (0x30a04 >> 2),
341 0x00000000,
342 (0x0600 << 16) | (0x30a10 >> 2),
343 0x00000000,
344 (0x0600 << 16) | (0x30a14 >> 2),
345 0x00000000,
346 (0x0600 << 16) | (0x30a18 >> 2),
347 0x00000000,
348 (0x0600 << 16) | (0x30a2c >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0xc700 >> 2),
351 0x00000000,
352 (0x0e00 << 16) | (0xc704 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0xc708 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0xc768 >> 2),
357 0x00000000,
358 (0x0400 << 16) | (0xc770 >> 2),
359 0x00000000,
360 (0x0400 << 16) | (0xc774 >> 2),
361 0x00000000,
362 (0x0400 << 16) | (0xc778 >> 2),
363 0x00000000,
364 (0x0400 << 16) | (0xc77c >> 2),
365 0x00000000,
366 (0x0400 << 16) | (0xc780 >> 2),
367 0x00000000,
368 (0x0400 << 16) | (0xc784 >> 2),
369 0x00000000,
370 (0x0400 << 16) | (0xc788 >> 2),
371 0x00000000,
372 (0x0400 << 16) | (0xc78c >> 2),
373 0x00000000,
374 (0x0400 << 16) | (0xc798 >> 2),
375 0x00000000,
376 (0x0400 << 16) | (0xc79c >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc7a0 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc7a4 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc7a8 >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc7ac >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc7b0 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc7b4 >> 2),
389 0x00000000,
390 (0x0e00 << 16) | (0x9100 >> 2),
391 0x00000000,
392 (0x0e00 << 16) | (0x3c010 >> 2),
393 0x00000000,
394 (0x0e00 << 16) | (0x92a8 >> 2),
395 0x00000000,
396 (0x0e00 << 16) | (0x92ac >> 2),
397 0x00000000,
398 (0x0e00 << 16) | (0x92b4 >> 2),
399 0x00000000,
400 (0x0e00 << 16) | (0x92b8 >> 2),
401 0x00000000,
402 (0x0e00 << 16) | (0x92bc >> 2),
403 0x00000000,
404 (0x0e00 << 16) | (0x92c0 >> 2),
405 0x00000000,
406 (0x0e00 << 16) | (0x92c4 >> 2),
407 0x00000000,
408 (0x0e00 << 16) | (0x92c8 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x92cc >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x92d0 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x8c00 >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x8c04 >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x8c20 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x8c38 >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x8c3c >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0xae00 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x9604 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0xac08 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0xac0c >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0xac10 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0xac14 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0xac58 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0xac68 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0xac6c >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0xac70 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0xac74 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0xac78 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac7c >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac80 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac84 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac88 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac8c >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0x970c >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0x9714 >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0x9718 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0x971c >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0x31068 >> 2),
467 0x00000000,
468 (0x4e00 << 16) | (0x31068 >> 2),
469 0x00000000,
470 (0x5e00 << 16) | (0x31068 >> 2),
471 0x00000000,
472 (0x6e00 << 16) | (0x31068 >> 2),
473 0x00000000,
474 (0x7e00 << 16) | (0x31068 >> 2),
475 0x00000000,
476 (0x8e00 << 16) | (0x31068 >> 2),
477 0x00000000,
478 (0x9e00 << 16) | (0x31068 >> 2),
479 0x00000000,
480 (0xae00 << 16) | (0x31068 >> 2),
481 0x00000000,
482 (0xbe00 << 16) | (0x31068 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0xcd10 >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0xcd14 >> 2),
487 0x00000000,
488 (0x0e00 << 16) | (0x88b0 >> 2),
489 0x00000000,
490 (0x0e00 << 16) | (0x88b4 >> 2),
491 0x00000000,
492 (0x0e00 << 16) | (0x88b8 >> 2),
493 0x00000000,
494 (0x0e00 << 16) | (0x88bc >> 2),
495 0x00000000,
496 (0x0400 << 16) | (0x89c0 >> 2),
497 0x00000000,
498 (0x0e00 << 16) | (0x88c4 >> 2),
499 0x00000000,
500 (0x0e00 << 16) | (0x88c8 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0x88d0 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0x88d4 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0x88d8 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x8980 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x30938 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x3093c >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x30940 >> 2),
515 0x00000000,
516 (0x0e00 << 16) | (0x89a0 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x30900 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x30904 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x89b4 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x3c210 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x3c214 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x3c218 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x8904 >> 2),
531 0x00000000,
532 0x5,
533 (0x0e00 << 16) | (0x8c28 >> 2),
534 (0x0e00 << 16) | (0x8c2c >> 2),
535 (0x0e00 << 16) | (0x8c30 >> 2),
536 (0x0e00 << 16) | (0x8c34 >> 2),
537 (0x0e00 << 16) | (0x9600 >> 2),
538};
539
540static const u32 kalindi_rlc_save_restore_register_list[] =
541{
542 (0x0e00 << 16) | (0xc12c >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0xc140 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0xc150 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0xc15c >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0xc168 >> 2),
551 0x00000000,
552 (0x0e00 << 16) | (0xc170 >> 2),
553 0x00000000,
554 (0x0e00 << 16) | (0xc204 >> 2),
555 0x00000000,
556 (0x0e00 << 16) | (0xc2b4 >> 2),
557 0x00000000,
558 (0x0e00 << 16) | (0xc2b8 >> 2),
559 0x00000000,
560 (0x0e00 << 16) | (0xc2bc >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0xc2c0 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0x8228 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0x829c >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0x869c >> 2),
569 0x00000000,
570 (0x0600 << 16) | (0x98f4 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0x98f8 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0x9900 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc260 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0x90e8 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0x3c000 >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0x3c00c >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x8c1c >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x9700 >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0xcd20 >> 2),
589 0x00000000,
590 (0x4e00 << 16) | (0xcd20 >> 2),
591 0x00000000,
592 (0x5e00 << 16) | (0xcd20 >> 2),
593 0x00000000,
594 (0x6e00 << 16) | (0xcd20 >> 2),
595 0x00000000,
596 (0x7e00 << 16) | (0xcd20 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x89bc >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x8900 >> 2),
601 0x00000000,
602 0x3,
603 (0x0e00 << 16) | (0xc130 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0xc134 >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0xc1fc >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0xc208 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0xc264 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xc268 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc26c >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xc270 >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xc274 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0xc28c >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xc290 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc294 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc298 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc2a0 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc2a4 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc2a8 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc2ac >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x301d0 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0x30238 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0x30250 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x30254 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0x30258 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0x3025c >> 2),
648 0x00000000,
649 (0x4e00 << 16) | (0xc900 >> 2),
650 0x00000000,
651 (0x5e00 << 16) | (0xc900 >> 2),
652 0x00000000,
653 (0x6e00 << 16) | (0xc900 >> 2),
654 0x00000000,
655 (0x7e00 << 16) | (0xc900 >> 2),
656 0x00000000,
657 (0x4e00 << 16) | (0xc904 >> 2),
658 0x00000000,
659 (0x5e00 << 16) | (0xc904 >> 2),
660 0x00000000,
661 (0x6e00 << 16) | (0xc904 >> 2),
662 0x00000000,
663 (0x7e00 << 16) | (0xc904 >> 2),
664 0x00000000,
665 (0x4e00 << 16) | (0xc908 >> 2),
666 0x00000000,
667 (0x5e00 << 16) | (0xc908 >> 2),
668 0x00000000,
669 (0x6e00 << 16) | (0xc908 >> 2),
670 0x00000000,
671 (0x7e00 << 16) | (0xc908 >> 2),
672 0x00000000,
673 (0x4e00 << 16) | (0xc90c >> 2),
674 0x00000000,
675 (0x5e00 << 16) | (0xc90c >> 2),
676 0x00000000,
677 (0x6e00 << 16) | (0xc90c >> 2),
678 0x00000000,
679 (0x7e00 << 16) | (0xc90c >> 2),
680 0x00000000,
681 (0x4e00 << 16) | (0xc910 >> 2),
682 0x00000000,
683 (0x5e00 << 16) | (0xc910 >> 2),
684 0x00000000,
685 (0x6e00 << 16) | (0xc910 >> 2),
686 0x00000000,
687 (0x7e00 << 16) | (0xc910 >> 2),
688 0x00000000,
689 (0x0e00 << 16) | (0xc99c >> 2),
690 0x00000000,
691 (0x0e00 << 16) | (0x9834 >> 2),
692 0x00000000,
693 (0x0000 << 16) | (0x30f00 >> 2),
694 0x00000000,
695 (0x0000 << 16) | (0x30f04 >> 2),
696 0x00000000,
697 (0x0000 << 16) | (0x30f08 >> 2),
698 0x00000000,
699 (0x0000 << 16) | (0x30f0c >> 2),
700 0x00000000,
701 (0x0600 << 16) | (0x9b7c >> 2),
702 0x00000000,
703 (0x0e00 << 16) | (0x8a14 >> 2),
704 0x00000000,
705 (0x0e00 << 16) | (0x8a18 >> 2),
706 0x00000000,
707 (0x0600 << 16) | (0x30a00 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0x8bf0 >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0x8bcc >> 2),
712 0x00000000,
713 (0x0e00 << 16) | (0x8b24 >> 2),
714 0x00000000,
715 (0x0e00 << 16) | (0x30a04 >> 2),
716 0x00000000,
717 (0x0600 << 16) | (0x30a10 >> 2),
718 0x00000000,
719 (0x0600 << 16) | (0x30a14 >> 2),
720 0x00000000,
721 (0x0600 << 16) | (0x30a18 >> 2),
722 0x00000000,
723 (0x0600 << 16) | (0x30a2c >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0xc700 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0xc704 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0xc708 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0xc768 >> 2),
732 0x00000000,
733 (0x0400 << 16) | (0xc770 >> 2),
734 0x00000000,
735 (0x0400 << 16) | (0xc774 >> 2),
736 0x00000000,
737 (0x0400 << 16) | (0xc798 >> 2),
738 0x00000000,
739 (0x0400 << 16) | (0xc79c >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0x9100 >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0x3c010 >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0x8c00 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0x8c04 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x8c20 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0x8c38 >> 2),
752 0x00000000,
753 (0x0e00 << 16) | (0x8c3c >> 2),
754 0x00000000,
755 (0x0e00 << 16) | (0xae00 >> 2),
756 0x00000000,
757 (0x0e00 << 16) | (0x9604 >> 2),
758 0x00000000,
759 (0x0e00 << 16) | (0xac08 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0xac0c >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0xac10 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0xac14 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0xac58 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0xac68 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0xac6c >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0xac70 >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xac74 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0xac78 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac7c >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac80 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac84 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac88 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac8c >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0x970c >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0x9714 >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0x9718 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0x971c >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0x31068 >> 2),
798 0x00000000,
799 (0x4e00 << 16) | (0x31068 >> 2),
800 0x00000000,
801 (0x5e00 << 16) | (0x31068 >> 2),
802 0x00000000,
803 (0x6e00 << 16) | (0x31068 >> 2),
804 0x00000000,
805 (0x7e00 << 16) | (0x31068 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xcd10 >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0xcd14 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x88b0 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x88b4 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x88b8 >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0x88bc >> 2),
818 0x00000000,
819 (0x0400 << 16) | (0x89c0 >> 2),
820 0x00000000,
821 (0x0e00 << 16) | (0x88c4 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0x88c8 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0x88d0 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0x88d4 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x88d8 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x8980 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x30938 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x3093c >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x30940 >> 2),
838 0x00000000,
839 (0x0e00 << 16) | (0x89a0 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x30900 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x30904 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x89b4 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x3e1fc >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x3c210 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x3c214 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x3c218 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x8904 >> 2),
856 0x00000000,
857 0x5,
858 (0x0e00 << 16) | (0x8c28 >> 2),
859 (0x0e00 << 16) | (0x8c2c >> 2),
860 (0x0e00 << 16) | (0x8c30 >> 2),
861 (0x0e00 << 16) | (0x8c34 >> 2),
862 (0x0e00 << 16) | (0x9600 >> 2),
863};
864
0aafd313
AD
865static const u32 bonaire_golden_spm_registers[] =
866{
867 0x30800, 0xe0ffffff, 0xe0000000
868};
869
870static const u32 bonaire_golden_common_registers[] =
871{
872 0xc770, 0xffffffff, 0x00000800,
873 0xc774, 0xffffffff, 0x00000800,
874 0xc798, 0xffffffff, 0x00007fbf,
875 0xc79c, 0xffffffff, 0x00007faf
876};
877
878static const u32 bonaire_golden_registers[] =
879{
880 0x3354, 0x00000333, 0x00000333,
881 0x3350, 0x000c0fc0, 0x00040200,
882 0x9a10, 0x00010000, 0x00058208,
883 0x3c000, 0xffff1fff, 0x00140000,
884 0x3c200, 0xfdfc0fff, 0x00000100,
885 0x3c234, 0x40000000, 0x40000200,
886 0x9830, 0xffffffff, 0x00000000,
887 0x9834, 0xf00fffff, 0x00000400,
888 0x9838, 0x0002021c, 0x00020200,
889 0xc78, 0x00000080, 0x00000000,
890 0x5bb0, 0x000000f0, 0x00000070,
891 0x5bc0, 0xf0311fff, 0x80300000,
892 0x98f8, 0x73773777, 0x12010001,
893 0x350c, 0x00810000, 0x408af000,
894 0x7030, 0x31000111, 0x00000011,
895 0x2f48, 0x73773777, 0x12010001,
896 0x220c, 0x00007fb6, 0x0021a1b1,
897 0x2210, 0x00007fb6, 0x002021b1,
898 0x2180, 0x00007fb6, 0x00002191,
899 0x2218, 0x00007fb6, 0x002121b1,
900 0x221c, 0x00007fb6, 0x002021b1,
901 0x21dc, 0x00007fb6, 0x00002191,
902 0x21e0, 0x00007fb6, 0x00002191,
903 0x3628, 0x0000003f, 0x0000000a,
904 0x362c, 0x0000003f, 0x0000000a,
905 0x2ae4, 0x00073ffe, 0x000022a2,
906 0x240c, 0x000007ff, 0x00000000,
907 0x8a14, 0xf000003f, 0x00000007,
908 0x8bf0, 0x00002001, 0x00000001,
909 0x8b24, 0xffffffff, 0x00ffffff,
910 0x30a04, 0x0000ff0f, 0x00000000,
911 0x28a4c, 0x07ffffff, 0x06000000,
912 0x4d8, 0x00000fff, 0x00000100,
913 0x3e78, 0x00000001, 0x00000002,
914 0x9100, 0x03000000, 0x0362c688,
915 0x8c00, 0x000000ff, 0x00000001,
916 0xe40, 0x00001fff, 0x00001fff,
917 0x9060, 0x0000007f, 0x00000020,
918 0x9508, 0x00010000, 0x00010000,
919 0xac14, 0x000003ff, 0x000000f3,
920 0xac0c, 0xffffffff, 0x00001032
921};
922
923static const u32 bonaire_mgcg_cgcg_init[] =
924{
925 0xc420, 0xffffffff, 0xfffffffc,
926 0x30800, 0xffffffff, 0xe0000000,
927 0x3c2a0, 0xffffffff, 0x00000100,
928 0x3c208, 0xffffffff, 0x00000100,
929 0x3c2c0, 0xffffffff, 0xc0000100,
930 0x3c2c8, 0xffffffff, 0xc0000100,
931 0x3c2c4, 0xffffffff, 0xc0000100,
932 0x55e4, 0xffffffff, 0x00600100,
933 0x3c280, 0xffffffff, 0x00000100,
934 0x3c214, 0xffffffff, 0x06000100,
935 0x3c220, 0xffffffff, 0x00000100,
936 0x3c218, 0xffffffff, 0x06000100,
937 0x3c204, 0xffffffff, 0x00000100,
938 0x3c2e0, 0xffffffff, 0x00000100,
939 0x3c224, 0xffffffff, 0x00000100,
940 0x3c200, 0xffffffff, 0x00000100,
941 0x3c230, 0xffffffff, 0x00000100,
942 0x3c234, 0xffffffff, 0x00000100,
943 0x3c250, 0xffffffff, 0x00000100,
944 0x3c254, 0xffffffff, 0x00000100,
945 0x3c258, 0xffffffff, 0x00000100,
946 0x3c25c, 0xffffffff, 0x00000100,
947 0x3c260, 0xffffffff, 0x00000100,
948 0x3c27c, 0xffffffff, 0x00000100,
949 0x3c278, 0xffffffff, 0x00000100,
950 0x3c210, 0xffffffff, 0x06000100,
951 0x3c290, 0xffffffff, 0x00000100,
952 0x3c274, 0xffffffff, 0x00000100,
953 0x3c2b4, 0xffffffff, 0x00000100,
954 0x3c2b0, 0xffffffff, 0x00000100,
955 0x3c270, 0xffffffff, 0x00000100,
956 0x30800, 0xffffffff, 0xe0000000,
957 0x3c020, 0xffffffff, 0x00010000,
958 0x3c024, 0xffffffff, 0x00030002,
959 0x3c028, 0xffffffff, 0x00040007,
960 0x3c02c, 0xffffffff, 0x00060005,
961 0x3c030, 0xffffffff, 0x00090008,
962 0x3c034, 0xffffffff, 0x00010000,
963 0x3c038, 0xffffffff, 0x00030002,
964 0x3c03c, 0xffffffff, 0x00040007,
965 0x3c040, 0xffffffff, 0x00060005,
966 0x3c044, 0xffffffff, 0x00090008,
967 0x3c048, 0xffffffff, 0x00010000,
968 0x3c04c, 0xffffffff, 0x00030002,
969 0x3c050, 0xffffffff, 0x00040007,
970 0x3c054, 0xffffffff, 0x00060005,
971 0x3c058, 0xffffffff, 0x00090008,
972 0x3c05c, 0xffffffff, 0x00010000,
973 0x3c060, 0xffffffff, 0x00030002,
974 0x3c064, 0xffffffff, 0x00040007,
975 0x3c068, 0xffffffff, 0x00060005,
976 0x3c06c, 0xffffffff, 0x00090008,
977 0x3c070, 0xffffffff, 0x00010000,
978 0x3c074, 0xffffffff, 0x00030002,
979 0x3c078, 0xffffffff, 0x00040007,
980 0x3c07c, 0xffffffff, 0x00060005,
981 0x3c080, 0xffffffff, 0x00090008,
982 0x3c084, 0xffffffff, 0x00010000,
983 0x3c088, 0xffffffff, 0x00030002,
984 0x3c08c, 0xffffffff, 0x00040007,
985 0x3c090, 0xffffffff, 0x00060005,
986 0x3c094, 0xffffffff, 0x00090008,
987 0x3c098, 0xffffffff, 0x00010000,
988 0x3c09c, 0xffffffff, 0x00030002,
989 0x3c0a0, 0xffffffff, 0x00040007,
990 0x3c0a4, 0xffffffff, 0x00060005,
991 0x3c0a8, 0xffffffff, 0x00090008,
992 0x3c000, 0xffffffff, 0x96e00200,
993 0x8708, 0xffffffff, 0x00900100,
994 0xc424, 0xffffffff, 0x0020003f,
995 0x38, 0xffffffff, 0x0140001c,
996 0x3c, 0x000f0000, 0x000f0000,
997 0x220, 0xffffffff, 0xC060000C,
998 0x224, 0xc0000fff, 0x00000100,
999 0xf90, 0xffffffff, 0x00000100,
1000 0xf98, 0x00000101, 0x00000000,
1001 0x20a8, 0xffffffff, 0x00000104,
1002 0x55e4, 0xff000fff, 0x00000100,
1003 0x30cc, 0xc0000fff, 0x00000104,
1004 0xc1e4, 0x00000001, 0x00000001,
1005 0xd00c, 0xff000ff0, 0x00000100,
1006 0xd80c, 0xff000ff0, 0x00000100
1007};
1008
1009static const u32 spectre_golden_spm_registers[] =
1010{
1011 0x30800, 0xe0ffffff, 0xe0000000
1012};
1013
1014static const u32 spectre_golden_common_registers[] =
1015{
1016 0xc770, 0xffffffff, 0x00000800,
1017 0xc774, 0xffffffff, 0x00000800,
1018 0xc798, 0xffffffff, 0x00007fbf,
1019 0xc79c, 0xffffffff, 0x00007faf
1020};
1021
1022static const u32 spectre_golden_registers[] =
1023{
1024 0x3c000, 0xffff1fff, 0x96940200,
1025 0x3c00c, 0xffff0001, 0xff000000,
1026 0x3c200, 0xfffc0fff, 0x00000100,
1027 0x6ed8, 0x00010101, 0x00010000,
1028 0x9834, 0xf00fffff, 0x00000400,
1029 0x9838, 0xfffffffc, 0x00020200,
1030 0x5bb0, 0x000000f0, 0x00000070,
1031 0x5bc0, 0xf0311fff, 0x80300000,
1032 0x98f8, 0x73773777, 0x12010001,
1033 0x9b7c, 0x00ff0000, 0x00fc0000,
1034 0x2f48, 0x73773777, 0x12010001,
1035 0x8a14, 0xf000003f, 0x00000007,
1036 0x8b24, 0xffffffff, 0x00ffffff,
1037 0x28350, 0x3f3f3fff, 0x00000082,
1038 0x28355, 0x0000003f, 0x00000000,
1039 0x3e78, 0x00000001, 0x00000002,
1040 0x913c, 0xffff03df, 0x00000004,
1041 0xc768, 0x00000008, 0x00000008,
1042 0x8c00, 0x000008ff, 0x00000800,
1043 0x9508, 0x00010000, 0x00010000,
1044 0xac0c, 0xffffffff, 0x54763210,
1045 0x214f8, 0x01ff01ff, 0x00000002,
1046 0x21498, 0x007ff800, 0x00200000,
1047 0x2015c, 0xffffffff, 0x00000f40,
1048 0x30934, 0xffffffff, 0x00000001
1049};
1050
1051static const u32 spectre_mgcg_cgcg_init[] =
1052{
1053 0xc420, 0xffffffff, 0xfffffffc,
1054 0x30800, 0xffffffff, 0xe0000000,
1055 0x3c2a0, 0xffffffff, 0x00000100,
1056 0x3c208, 0xffffffff, 0x00000100,
1057 0x3c2c0, 0xffffffff, 0x00000100,
1058 0x3c2c8, 0xffffffff, 0x00000100,
1059 0x3c2c4, 0xffffffff, 0x00000100,
1060 0x55e4, 0xffffffff, 0x00600100,
1061 0x3c280, 0xffffffff, 0x00000100,
1062 0x3c214, 0xffffffff, 0x06000100,
1063 0x3c220, 0xffffffff, 0x00000100,
1064 0x3c218, 0xffffffff, 0x06000100,
1065 0x3c204, 0xffffffff, 0x00000100,
1066 0x3c2e0, 0xffffffff, 0x00000100,
1067 0x3c224, 0xffffffff, 0x00000100,
1068 0x3c200, 0xffffffff, 0x00000100,
1069 0x3c230, 0xffffffff, 0x00000100,
1070 0x3c234, 0xffffffff, 0x00000100,
1071 0x3c250, 0xffffffff, 0x00000100,
1072 0x3c254, 0xffffffff, 0x00000100,
1073 0x3c258, 0xffffffff, 0x00000100,
1074 0x3c25c, 0xffffffff, 0x00000100,
1075 0x3c260, 0xffffffff, 0x00000100,
1076 0x3c27c, 0xffffffff, 0x00000100,
1077 0x3c278, 0xffffffff, 0x00000100,
1078 0x3c210, 0xffffffff, 0x06000100,
1079 0x3c290, 0xffffffff, 0x00000100,
1080 0x3c274, 0xffffffff, 0x00000100,
1081 0x3c2b4, 0xffffffff, 0x00000100,
1082 0x3c2b0, 0xffffffff, 0x00000100,
1083 0x3c270, 0xffffffff, 0x00000100,
1084 0x30800, 0xffffffff, 0xe0000000,
1085 0x3c020, 0xffffffff, 0x00010000,
1086 0x3c024, 0xffffffff, 0x00030002,
1087 0x3c028, 0xffffffff, 0x00040007,
1088 0x3c02c, 0xffffffff, 0x00060005,
1089 0x3c030, 0xffffffff, 0x00090008,
1090 0x3c034, 0xffffffff, 0x00010000,
1091 0x3c038, 0xffffffff, 0x00030002,
1092 0x3c03c, 0xffffffff, 0x00040007,
1093 0x3c040, 0xffffffff, 0x00060005,
1094 0x3c044, 0xffffffff, 0x00090008,
1095 0x3c048, 0xffffffff, 0x00010000,
1096 0x3c04c, 0xffffffff, 0x00030002,
1097 0x3c050, 0xffffffff, 0x00040007,
1098 0x3c054, 0xffffffff, 0x00060005,
1099 0x3c058, 0xffffffff, 0x00090008,
1100 0x3c05c, 0xffffffff, 0x00010000,
1101 0x3c060, 0xffffffff, 0x00030002,
1102 0x3c064, 0xffffffff, 0x00040007,
1103 0x3c068, 0xffffffff, 0x00060005,
1104 0x3c06c, 0xffffffff, 0x00090008,
1105 0x3c070, 0xffffffff, 0x00010000,
1106 0x3c074, 0xffffffff, 0x00030002,
1107 0x3c078, 0xffffffff, 0x00040007,
1108 0x3c07c, 0xffffffff, 0x00060005,
1109 0x3c080, 0xffffffff, 0x00090008,
1110 0x3c084, 0xffffffff, 0x00010000,
1111 0x3c088, 0xffffffff, 0x00030002,
1112 0x3c08c, 0xffffffff, 0x00040007,
1113 0x3c090, 0xffffffff, 0x00060005,
1114 0x3c094, 0xffffffff, 0x00090008,
1115 0x3c098, 0xffffffff, 0x00010000,
1116 0x3c09c, 0xffffffff, 0x00030002,
1117 0x3c0a0, 0xffffffff, 0x00040007,
1118 0x3c0a4, 0xffffffff, 0x00060005,
1119 0x3c0a8, 0xffffffff, 0x00090008,
1120 0x3c0ac, 0xffffffff, 0x00010000,
1121 0x3c0b0, 0xffffffff, 0x00030002,
1122 0x3c0b4, 0xffffffff, 0x00040007,
1123 0x3c0b8, 0xffffffff, 0x00060005,
1124 0x3c0bc, 0xffffffff, 0x00090008,
1125 0x3c000, 0xffffffff, 0x96e00200,
1126 0x8708, 0xffffffff, 0x00900100,
1127 0xc424, 0xffffffff, 0x0020003f,
1128 0x38, 0xffffffff, 0x0140001c,
1129 0x3c, 0x000f0000, 0x000f0000,
1130 0x220, 0xffffffff, 0xC060000C,
1131 0x224, 0xc0000fff, 0x00000100,
1132 0xf90, 0xffffffff, 0x00000100,
1133 0xf98, 0x00000101, 0x00000000,
1134 0x20a8, 0xffffffff, 0x00000104,
1135 0x55e4, 0xff000fff, 0x00000100,
1136 0x30cc, 0xc0000fff, 0x00000104,
1137 0xc1e4, 0x00000001, 0x00000001,
1138 0xd00c, 0xff000ff0, 0x00000100,
1139 0xd80c, 0xff000ff0, 0x00000100
1140};
1141
1142static const u32 kalindi_golden_spm_registers[] =
1143{
1144 0x30800, 0xe0ffffff, 0xe0000000
1145};
1146
1147static const u32 kalindi_golden_common_registers[] =
1148{
1149 0xc770, 0xffffffff, 0x00000800,
1150 0xc774, 0xffffffff, 0x00000800,
1151 0xc798, 0xffffffff, 0x00007fbf,
1152 0xc79c, 0xffffffff, 0x00007faf
1153};
1154
1155static const u32 kalindi_golden_registers[] =
1156{
1157 0x3c000, 0xffffdfff, 0x6e944040,
1158 0x55e4, 0xff607fff, 0xfc000100,
1159 0x3c220, 0xff000fff, 0x00000100,
1160 0x3c224, 0xff000fff, 0x00000100,
1161 0x3c200, 0xfffc0fff, 0x00000100,
1162 0x6ed8, 0x00010101, 0x00010000,
1163 0x9830, 0xffffffff, 0x00000000,
1164 0x9834, 0xf00fffff, 0x00000400,
1165 0x5bb0, 0x000000f0, 0x00000070,
1166 0x5bc0, 0xf0311fff, 0x80300000,
1167 0x98f8, 0x73773777, 0x12010001,
1168 0x98fc, 0xffffffff, 0x00000010,
1169 0x9b7c, 0x00ff0000, 0x00fc0000,
1170 0x8030, 0x00001f0f, 0x0000100a,
1171 0x2f48, 0x73773777, 0x12010001,
1172 0x2408, 0x000fffff, 0x000c007f,
1173 0x8a14, 0xf000003f, 0x00000007,
1174 0x8b24, 0x3fff3fff, 0x00ffcfff,
1175 0x30a04, 0x0000ff0f, 0x00000000,
1176 0x28a4c, 0x07ffffff, 0x06000000,
1177 0x4d8, 0x00000fff, 0x00000100,
1178 0x3e78, 0x00000001, 0x00000002,
1179 0xc768, 0x00000008, 0x00000008,
1180 0x8c00, 0x000000ff, 0x00000003,
1181 0x214f8, 0x01ff01ff, 0x00000002,
1182 0x21498, 0x007ff800, 0x00200000,
1183 0x2015c, 0xffffffff, 0x00000f40,
1184 0x88c4, 0x001f3ae3, 0x00000082,
1185 0x88d4, 0x0000001f, 0x00000010,
1186 0x30934, 0xffffffff, 0x00000000
1187};
1188
1189static const u32 kalindi_mgcg_cgcg_init[] =
1190{
1191 0xc420, 0xffffffff, 0xfffffffc,
1192 0x30800, 0xffffffff, 0xe0000000,
1193 0x3c2a0, 0xffffffff, 0x00000100,
1194 0x3c208, 0xffffffff, 0x00000100,
1195 0x3c2c0, 0xffffffff, 0x00000100,
1196 0x3c2c8, 0xffffffff, 0x00000100,
1197 0x3c2c4, 0xffffffff, 0x00000100,
1198 0x55e4, 0xffffffff, 0x00600100,
1199 0x3c280, 0xffffffff, 0x00000100,
1200 0x3c214, 0xffffffff, 0x06000100,
1201 0x3c220, 0xffffffff, 0x00000100,
1202 0x3c218, 0xffffffff, 0x06000100,
1203 0x3c204, 0xffffffff, 0x00000100,
1204 0x3c2e0, 0xffffffff, 0x00000100,
1205 0x3c224, 0xffffffff, 0x00000100,
1206 0x3c200, 0xffffffff, 0x00000100,
1207 0x3c230, 0xffffffff, 0x00000100,
1208 0x3c234, 0xffffffff, 0x00000100,
1209 0x3c250, 0xffffffff, 0x00000100,
1210 0x3c254, 0xffffffff, 0x00000100,
1211 0x3c258, 0xffffffff, 0x00000100,
1212 0x3c25c, 0xffffffff, 0x00000100,
1213 0x3c260, 0xffffffff, 0x00000100,
1214 0x3c27c, 0xffffffff, 0x00000100,
1215 0x3c278, 0xffffffff, 0x00000100,
1216 0x3c210, 0xffffffff, 0x06000100,
1217 0x3c290, 0xffffffff, 0x00000100,
1218 0x3c274, 0xffffffff, 0x00000100,
1219 0x3c2b4, 0xffffffff, 0x00000100,
1220 0x3c2b0, 0xffffffff, 0x00000100,
1221 0x3c270, 0xffffffff, 0x00000100,
1222 0x30800, 0xffffffff, 0xe0000000,
1223 0x3c020, 0xffffffff, 0x00010000,
1224 0x3c024, 0xffffffff, 0x00030002,
1225 0x3c028, 0xffffffff, 0x00040007,
1226 0x3c02c, 0xffffffff, 0x00060005,
1227 0x3c030, 0xffffffff, 0x00090008,
1228 0x3c034, 0xffffffff, 0x00010000,
1229 0x3c038, 0xffffffff, 0x00030002,
1230 0x3c03c, 0xffffffff, 0x00040007,
1231 0x3c040, 0xffffffff, 0x00060005,
1232 0x3c044, 0xffffffff, 0x00090008,
1233 0x3c000, 0xffffffff, 0x96e00200,
1234 0x8708, 0xffffffff, 0x00900100,
1235 0xc424, 0xffffffff, 0x0020003f,
1236 0x38, 0xffffffff, 0x0140001c,
1237 0x3c, 0x000f0000, 0x000f0000,
1238 0x220, 0xffffffff, 0xC060000C,
1239 0x224, 0xc0000fff, 0x00000100,
1240 0x20a8, 0xffffffff, 0x00000104,
1241 0x55e4, 0xff000fff, 0x00000100,
1242 0x30cc, 0xc0000fff, 0x00000104,
1243 0xc1e4, 0x00000001, 0x00000001,
1244 0xd00c, 0xff000ff0, 0x00000100,
1245 0xd80c, 0xff000ff0, 0x00000100
1246};
1247
1248static void cik_init_golden_registers(struct radeon_device *rdev)
1249{
1250 switch (rdev->family) {
1251 case CHIP_BONAIRE:
1252 radeon_program_register_sequence(rdev,
1253 bonaire_mgcg_cgcg_init,
1254 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1255 radeon_program_register_sequence(rdev,
1256 bonaire_golden_registers,
1257 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1258 radeon_program_register_sequence(rdev,
1259 bonaire_golden_common_registers,
1260 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1261 radeon_program_register_sequence(rdev,
1262 bonaire_golden_spm_registers,
1263 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1264 break;
1265 case CHIP_KABINI:
1266 radeon_program_register_sequence(rdev,
1267 kalindi_mgcg_cgcg_init,
1268 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1269 radeon_program_register_sequence(rdev,
1270 kalindi_golden_registers,
1271 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1272 radeon_program_register_sequence(rdev,
1273 kalindi_golden_common_registers,
1274 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1275 radeon_program_register_sequence(rdev,
1276 kalindi_golden_spm_registers,
1277 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1278 break;
1279 case CHIP_KAVERI:
1280 radeon_program_register_sequence(rdev,
1281 spectre_mgcg_cgcg_init,
1282 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1283 radeon_program_register_sequence(rdev,
1284 spectre_golden_registers,
1285 (const u32)ARRAY_SIZE(spectre_golden_registers));
1286 radeon_program_register_sequence(rdev,
1287 spectre_golden_common_registers,
1288 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1289 radeon_program_register_sequence(rdev,
1290 spectre_golden_spm_registers,
1291 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1292 break;
1293 default:
1294 break;
1295 }
1296}
1297
2c67912c
AD
1298/**
1299 * cik_get_xclk - get the xclk
1300 *
1301 * @rdev: radeon_device pointer
1302 *
1303 * Returns the reference clock used by the gfx engine
1304 * (CIK).
1305 */
1306u32 cik_get_xclk(struct radeon_device *rdev)
1307{
1308 u32 reference_clock = rdev->clock.spll.reference_freq;
1309
1310 if (rdev->flags & RADEON_IS_IGP) {
1311 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1312 return reference_clock / 2;
1313 } else {
1314 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1315 return reference_clock / 4;
1316 }
1317 return reference_clock;
1318}
1319
75efdee1
AD
1320/**
1321 * cik_mm_rdoorbell - read a doorbell dword
1322 *
1323 * @rdev: radeon_device pointer
1324 * @offset: byte offset into the aperture
1325 *
1326 * Returns the value in the doorbell aperture at the
1327 * requested offset (CIK).
1328 */
1329u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1330{
1331 if (offset < rdev->doorbell.size) {
1332 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1333 } else {
1334 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1335 return 0;
1336 }
1337}
1338
1339/**
1340 * cik_mm_wdoorbell - write a doorbell dword
1341 *
1342 * @rdev: radeon_device pointer
1343 * @offset: byte offset into the aperture
1344 * @v: value to write
1345 *
1346 * Writes @v to the doorbell aperture at the
1347 * requested offset (CIK).
1348 */
1349void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1350{
1351 if (offset < rdev->doorbell.size) {
1352 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1353 } else {
1354 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1355 }
1356}
1357
bc8273fe
AD
1358#define BONAIRE_IO_MC_REGS_SIZE 36
1359
1360static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1361{
1362 {0x00000070, 0x04400000},
1363 {0x00000071, 0x80c01803},
1364 {0x00000072, 0x00004004},
1365 {0x00000073, 0x00000100},
1366 {0x00000074, 0x00ff0000},
1367 {0x00000075, 0x34000000},
1368 {0x00000076, 0x08000014},
1369 {0x00000077, 0x00cc08ec},
1370 {0x00000078, 0x00000400},
1371 {0x00000079, 0x00000000},
1372 {0x0000007a, 0x04090000},
1373 {0x0000007c, 0x00000000},
1374 {0x0000007e, 0x4408a8e8},
1375 {0x0000007f, 0x00000304},
1376 {0x00000080, 0x00000000},
1377 {0x00000082, 0x00000001},
1378 {0x00000083, 0x00000002},
1379 {0x00000084, 0xf3e4f400},
1380 {0x00000085, 0x052024e3},
1381 {0x00000087, 0x00000000},
1382 {0x00000088, 0x01000000},
1383 {0x0000008a, 0x1c0a0000},
1384 {0x0000008b, 0xff010000},
1385 {0x0000008d, 0xffffefff},
1386 {0x0000008e, 0xfff3efff},
1387 {0x0000008f, 0xfff3efbf},
1388 {0x00000092, 0xf7ffffff},
1389 {0x00000093, 0xffffff7f},
1390 {0x00000095, 0x00101101},
1391 {0x00000096, 0x00000fff},
1392 {0x00000097, 0x00116fff},
1393 {0x00000098, 0x60010000},
1394 {0x00000099, 0x10010000},
1395 {0x0000009a, 0x00006000},
1396 {0x0000009b, 0x00001000},
1397 {0x0000009f, 0x00b48000}
1398};
1399
b556b12e
AD
1400/**
1401 * cik_srbm_select - select specific register instances
1402 *
1403 * @rdev: radeon_device pointer
1404 * @me: selected ME (micro engine)
1405 * @pipe: pipe
1406 * @queue: queue
1407 * @vmid: VMID
1408 *
1409 * Switches the currently active registers instances. Some
1410 * registers are instanced per VMID, others are instanced per
1411 * me/pipe/queue combination.
1412 */
1413static void cik_srbm_select(struct radeon_device *rdev,
1414 u32 me, u32 pipe, u32 queue, u32 vmid)
1415{
1416 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1417 MEID(me & 0x3) |
1418 VMID(vmid & 0xf) |
1419 QUEUEID(queue & 0x7));
1420 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1421}
1422
bc8273fe
AD
1423/* ucode loading */
1424/**
1425 * ci_mc_load_microcode - load MC ucode into the hw
1426 *
1427 * @rdev: radeon_device pointer
1428 *
1429 * Load the GDDR MC ucode into the hw (CIK).
1430 * Returns 0 on success, error on failure.
1431 */
1432static int ci_mc_load_microcode(struct radeon_device *rdev)
1433{
1434 const __be32 *fw_data;
1435 u32 running, blackout = 0;
1436 u32 *io_mc_regs;
1437 int i, ucode_size, regs_size;
1438
1439 if (!rdev->mc_fw)
1440 return -EINVAL;
1441
1442 switch (rdev->family) {
1443 case CHIP_BONAIRE:
1444 default:
1445 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1446 ucode_size = CIK_MC_UCODE_SIZE;
1447 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1448 break;
1449 }
1450
1451 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1452
1453 if (running == 0) {
1454 if (running) {
1455 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1456 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1457 }
1458
1459 /* reset the engine and set to writable */
1460 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1461 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1462
1463 /* load mc io regs */
1464 for (i = 0; i < regs_size; i++) {
1465 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1466 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1467 }
1468 /* load the MC ucode */
1469 fw_data = (const __be32 *)rdev->mc_fw->data;
1470 for (i = 0; i < ucode_size; i++)
1471 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1472
1473 /* put the engine back into the active state */
1474 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1475 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1476 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1477
1478 /* wait for training to complete */
1479 for (i = 0; i < rdev->usec_timeout; i++) {
1480 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1481 break;
1482 udelay(1);
1483 }
1484 for (i = 0; i < rdev->usec_timeout; i++) {
1485 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1486 break;
1487 udelay(1);
1488 }
1489
1490 if (running)
1491 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1492 }
1493
1494 return 0;
1495}
1496
02c81327
AD
1497/**
1498 * cik_init_microcode - load ucode images from disk
1499 *
1500 * @rdev: radeon_device pointer
1501 *
1502 * Use the firmware interface to load the ucode images into
1503 * the driver (not loaded into hw).
1504 * Returns 0 on success, error on failure.
1505 */
1506static int cik_init_microcode(struct radeon_device *rdev)
1507{
02c81327
AD
1508 const char *chip_name;
1509 size_t pfp_req_size, me_req_size, ce_req_size,
21a93e13
AD
1510 mec_req_size, rlc_req_size, mc_req_size,
1511 sdma_req_size;
02c81327
AD
1512 char fw_name[30];
1513 int err;
1514
1515 DRM_DEBUG("\n");
1516
02c81327
AD
1517 switch (rdev->family) {
1518 case CHIP_BONAIRE:
1519 chip_name = "BONAIRE";
1520 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1521 me_req_size = CIK_ME_UCODE_SIZE * 4;
1522 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1523 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1524 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1525 mc_req_size = CIK_MC_UCODE_SIZE * 4;
21a93e13 1526 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1527 break;
1528 case CHIP_KAVERI:
1529 chip_name = "KAVERI";
1530 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1531 me_req_size = CIK_ME_UCODE_SIZE * 4;
1532 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1533 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1534 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
21a93e13 1535 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1536 break;
1537 case CHIP_KABINI:
1538 chip_name = "KABINI";
1539 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1540 me_req_size = CIK_ME_UCODE_SIZE * 4;
1541 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1542 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1543 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
21a93e13 1544 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1545 break;
1546 default: BUG();
1547 }
1548
1549 DRM_INFO("Loading %s Microcode\n", chip_name);
1550
1551 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
0a168933 1552 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
02c81327
AD
1553 if (err)
1554 goto out;
1555 if (rdev->pfp_fw->size != pfp_req_size) {
1556 printk(KERN_ERR
1557 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1558 rdev->pfp_fw->size, fw_name);
1559 err = -EINVAL;
1560 goto out;
1561 }
1562
1563 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
0a168933 1564 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
02c81327
AD
1565 if (err)
1566 goto out;
1567 if (rdev->me_fw->size != me_req_size) {
1568 printk(KERN_ERR
1569 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1570 rdev->me_fw->size, fw_name);
1571 err = -EINVAL;
1572 }
1573
1574 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
0a168933 1575 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
02c81327
AD
1576 if (err)
1577 goto out;
1578 if (rdev->ce_fw->size != ce_req_size) {
1579 printk(KERN_ERR
1580 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1581 rdev->ce_fw->size, fw_name);
1582 err = -EINVAL;
1583 }
1584
1585 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
0a168933 1586 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
02c81327
AD
1587 if (err)
1588 goto out;
1589 if (rdev->mec_fw->size != mec_req_size) {
1590 printk(KERN_ERR
1591 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1592 rdev->mec_fw->size, fw_name);
1593 err = -EINVAL;
1594 }
1595
1596 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
0a168933 1597 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
02c81327
AD
1598 if (err)
1599 goto out;
1600 if (rdev->rlc_fw->size != rlc_req_size) {
1601 printk(KERN_ERR
1602 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1603 rdev->rlc_fw->size, fw_name);
1604 err = -EINVAL;
1605 }
1606
21a93e13 1607 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
0a168933 1608 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
21a93e13
AD
1609 if (err)
1610 goto out;
1611 if (rdev->sdma_fw->size != sdma_req_size) {
1612 printk(KERN_ERR
1613 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1614 rdev->sdma_fw->size, fw_name);
1615 err = -EINVAL;
1616 }
1617
02c81327
AD
1618 /* No MC ucode on APUs */
1619 if (!(rdev->flags & RADEON_IS_IGP)) {
1620 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
0a168933 1621 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
02c81327
AD
1622 if (err)
1623 goto out;
1624 if (rdev->mc_fw->size != mc_req_size) {
1625 printk(KERN_ERR
1626 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1627 rdev->mc_fw->size, fw_name);
1628 err = -EINVAL;
1629 }
1630 }
1631
1632out:
02c81327
AD
1633 if (err) {
1634 if (err != -EINVAL)
1635 printk(KERN_ERR
1636 "cik_cp: Failed to load firmware \"%s\"\n",
1637 fw_name);
1638 release_firmware(rdev->pfp_fw);
1639 rdev->pfp_fw = NULL;
1640 release_firmware(rdev->me_fw);
1641 rdev->me_fw = NULL;
1642 release_firmware(rdev->ce_fw);
1643 rdev->ce_fw = NULL;
1644 release_firmware(rdev->rlc_fw);
1645 rdev->rlc_fw = NULL;
1646 release_firmware(rdev->mc_fw);
1647 rdev->mc_fw = NULL;
1648 }
1649 return err;
1650}
1651
8cc1a532
AD
1652/*
1653 * Core functions
1654 */
1655/**
1656 * cik_tiling_mode_table_init - init the hw tiling table
1657 *
1658 * @rdev: radeon_device pointer
1659 *
1660 * Starting with SI, the tiling setup is done globally in a
1661 * set of 32 tiling modes. Rather than selecting each set of
1662 * parameters per surface as on older asics, we just select
1663 * which index in the tiling table we want to use, and the
1664 * surface uses those parameters (CIK).
1665 */
1666static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1667{
1668 const u32 num_tile_mode_states = 32;
1669 const u32 num_secondary_tile_mode_states = 16;
1670 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1671 u32 num_pipe_configs;
1672 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1673 rdev->config.cik.max_shader_engines;
1674
1675 switch (rdev->config.cik.mem_row_size_in_kb) {
1676 case 1:
1677 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1678 break;
1679 case 2:
1680 default:
1681 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1682 break;
1683 case 4:
1684 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1685 break;
1686 }
1687
1688 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1689 if (num_pipe_configs > 8)
1690 num_pipe_configs = 8; /* ??? */
1691
1692 if (num_pipe_configs == 8) {
1693 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1694 switch (reg_offset) {
1695 case 0:
1696 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1697 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1699 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1700 break;
1701 case 1:
1702 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1704 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1706 break;
1707 case 2:
1708 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1710 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1711 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1712 break;
1713 case 3:
1714 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1716 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1717 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1718 break;
1719 case 4:
1720 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1721 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1722 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1723 TILE_SPLIT(split_equal_to_row_size));
1724 break;
1725 case 5:
1726 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1727 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1728 break;
1729 case 6:
1730 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1731 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1732 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1734 break;
1735 case 7:
1736 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1737 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1738 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1739 TILE_SPLIT(split_equal_to_row_size));
1740 break;
1741 case 8:
1742 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1743 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1744 break;
1745 case 9:
1746 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1747 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1748 break;
1749 case 10:
1750 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1751 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1752 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1754 break;
1755 case 11:
1756 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1757 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1758 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1760 break;
1761 case 12:
1762 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1763 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1764 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1766 break;
1767 case 13:
1768 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1769 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1770 break;
1771 case 14:
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1776 break;
1777 case 16:
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1782 break;
1783 case 17:
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1788 break;
1789 case 27:
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1792 break;
1793 case 28:
1794 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1795 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1796 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1798 break;
1799 case 29:
1800 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1801 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1802 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1804 break;
1805 case 30:
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1810 break;
1811 default:
1812 gb_tile_moden = 0;
1813 break;
1814 }
39aee490 1815 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
1816 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1817 }
1818 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1819 switch (reg_offset) {
1820 case 0:
1821 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1824 NUM_BANKS(ADDR_SURF_16_BANK));
1825 break;
1826 case 1:
1827 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1828 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1829 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1830 NUM_BANKS(ADDR_SURF_16_BANK));
1831 break;
1832 case 2:
1833 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1834 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1835 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1836 NUM_BANKS(ADDR_SURF_16_BANK));
1837 break;
1838 case 3:
1839 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1842 NUM_BANKS(ADDR_SURF_16_BANK));
1843 break;
1844 case 4:
1845 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1848 NUM_BANKS(ADDR_SURF_8_BANK));
1849 break;
1850 case 5:
1851 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1854 NUM_BANKS(ADDR_SURF_4_BANK));
1855 break;
1856 case 6:
1857 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1860 NUM_BANKS(ADDR_SURF_2_BANK));
1861 break;
1862 case 8:
1863 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1864 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1865 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1866 NUM_BANKS(ADDR_SURF_16_BANK));
1867 break;
1868 case 9:
1869 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1872 NUM_BANKS(ADDR_SURF_16_BANK));
1873 break;
1874 case 10:
1875 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1878 NUM_BANKS(ADDR_SURF_16_BANK));
1879 break;
1880 case 11:
1881 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1884 NUM_BANKS(ADDR_SURF_16_BANK));
1885 break;
1886 case 12:
1887 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1890 NUM_BANKS(ADDR_SURF_8_BANK));
1891 break;
1892 case 13:
1893 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1894 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1895 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1896 NUM_BANKS(ADDR_SURF_4_BANK));
1897 break;
1898 case 14:
1899 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1902 NUM_BANKS(ADDR_SURF_2_BANK));
1903 break;
1904 default:
1905 gb_tile_moden = 0;
1906 break;
1907 }
1908 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1909 }
1910 } else if (num_pipe_configs == 4) {
1911 if (num_rbs == 4) {
1912 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1913 switch (reg_offset) {
1914 case 0:
1915 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1919 break;
1920 case 1:
1921 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1922 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1923 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1924 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1925 break;
1926 case 2:
1927 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1929 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1931 break;
1932 case 3:
1933 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1934 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1935 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1936 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1937 break;
1938 case 4:
1939 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1941 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942 TILE_SPLIT(split_equal_to_row_size));
1943 break;
1944 case 5:
1945 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1946 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1947 break;
1948 case 6:
1949 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1950 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1951 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1953 break;
1954 case 7:
1955 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1957 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1958 TILE_SPLIT(split_equal_to_row_size));
1959 break;
1960 case 8:
1961 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1962 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1963 break;
1964 case 9:
1965 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1966 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1967 break;
1968 case 10:
1969 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1971 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1972 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1973 break;
1974 case 11:
1975 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1976 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1977 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1979 break;
1980 case 12:
1981 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1982 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1983 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1985 break;
1986 case 13:
1987 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1988 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1989 break;
1990 case 14:
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1995 break;
1996 case 16:
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2001 break;
2002 case 17:
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2007 break;
2008 case 27:
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2011 break;
2012 case 28:
2013 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2015 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2017 break;
2018 case 29:
2019 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2021 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2023 break;
2024 case 30:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2029 break;
2030 default:
2031 gb_tile_moden = 0;
2032 break;
2033 }
39aee490 2034 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2035 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2036 }
2037 } else if (num_rbs < 4) {
2038 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2039 switch (reg_offset) {
2040 case 0:
2041 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2043 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2044 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2045 break;
2046 case 1:
2047 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2049 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2051 break;
2052 case 2:
2053 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2055 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2056 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2057 break;
2058 case 3:
2059 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2061 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2063 break;
2064 case 4:
2065 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2067 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2068 TILE_SPLIT(split_equal_to_row_size));
2069 break;
2070 case 5:
2071 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2073 break;
2074 case 6:
2075 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2077 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2078 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2079 break;
2080 case 7:
2081 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2083 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2084 TILE_SPLIT(split_equal_to_row_size));
2085 break;
2086 case 8:
2087 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2088 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2089 break;
2090 case 9:
2091 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2092 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2093 break;
2094 case 10:
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 break;
2100 case 11:
2101 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2103 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2105 break;
2106 case 12:
2107 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2108 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2109 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2111 break;
2112 case 13:
2113 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2115 break;
2116 case 14:
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2121 break;
2122 case 16:
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127 break;
2128 case 17:
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2133 break;
2134 case 27:
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2137 break;
2138 case 28:
2139 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2143 break;
2144 case 29:
2145 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2147 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 break;
2150 case 30:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2155 break;
2156 default:
2157 gb_tile_moden = 0;
2158 break;
2159 }
39aee490 2160 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2161 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2162 }
2163 }
2164 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2165 switch (reg_offset) {
2166 case 0:
2167 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2170 NUM_BANKS(ADDR_SURF_16_BANK));
2171 break;
2172 case 1:
2173 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2176 NUM_BANKS(ADDR_SURF_16_BANK));
2177 break;
2178 case 2:
2179 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2182 NUM_BANKS(ADDR_SURF_16_BANK));
2183 break;
2184 case 3:
2185 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 NUM_BANKS(ADDR_SURF_16_BANK));
2189 break;
2190 case 4:
2191 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2194 NUM_BANKS(ADDR_SURF_16_BANK));
2195 break;
2196 case 5:
2197 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200 NUM_BANKS(ADDR_SURF_8_BANK));
2201 break;
2202 case 6:
2203 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206 NUM_BANKS(ADDR_SURF_4_BANK));
2207 break;
2208 case 8:
2209 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2212 NUM_BANKS(ADDR_SURF_16_BANK));
2213 break;
2214 case 9:
2215 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2218 NUM_BANKS(ADDR_SURF_16_BANK));
2219 break;
2220 case 10:
2221 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 NUM_BANKS(ADDR_SURF_16_BANK));
2225 break;
2226 case 11:
2227 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2230 NUM_BANKS(ADDR_SURF_16_BANK));
2231 break;
2232 case 12:
2233 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2236 NUM_BANKS(ADDR_SURF_16_BANK));
2237 break;
2238 case 13:
2239 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2242 NUM_BANKS(ADDR_SURF_8_BANK));
2243 break;
2244 case 14:
2245 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2248 NUM_BANKS(ADDR_SURF_4_BANK));
2249 break;
2250 default:
2251 gb_tile_moden = 0;
2252 break;
2253 }
2254 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2255 }
2256 } else if (num_pipe_configs == 2) {
2257 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2258 switch (reg_offset) {
2259 case 0:
2260 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2262 PIPE_CONFIG(ADDR_SURF_P2) |
2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2264 break;
2265 case 1:
2266 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2270 break;
2271 case 2:
2272 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2274 PIPE_CONFIG(ADDR_SURF_P2) |
2275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2276 break;
2277 case 3:
2278 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2280 PIPE_CONFIG(ADDR_SURF_P2) |
2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2282 break;
2283 case 4:
2284 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2286 PIPE_CONFIG(ADDR_SURF_P2) |
2287 TILE_SPLIT(split_equal_to_row_size));
2288 break;
2289 case 5:
2290 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2292 break;
2293 case 6:
2294 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2296 PIPE_CONFIG(ADDR_SURF_P2) |
2297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2298 break;
2299 case 7:
2300 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 TILE_SPLIT(split_equal_to_row_size));
2304 break;
2305 case 8:
2306 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2307 break;
2308 case 9:
2309 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2311 break;
2312 case 10:
2313 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317 break;
2318 case 11:
2319 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2321 PIPE_CONFIG(ADDR_SURF_P2) |
2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2323 break;
2324 case 12:
2325 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 break;
2330 case 13:
2331 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2333 break;
2334 case 14:
2335 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339 break;
2340 case 16:
2341 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345 break;
2346 case 17:
2347 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2349 PIPE_CONFIG(ADDR_SURF_P2) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351 break;
2352 case 27:
2353 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2355 break;
2356 case 28:
2357 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2359 PIPE_CONFIG(ADDR_SURF_P2) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 break;
2362 case 29:
2363 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2365 PIPE_CONFIG(ADDR_SURF_P2) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 break;
2368 case 30:
2369 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2371 PIPE_CONFIG(ADDR_SURF_P2) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 break;
2374 default:
2375 gb_tile_moden = 0;
2376 break;
2377 }
39aee490 2378 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2379 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2380 }
2381 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2382 switch (reg_offset) {
2383 case 0:
2384 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2387 NUM_BANKS(ADDR_SURF_16_BANK));
2388 break;
2389 case 1:
2390 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2393 NUM_BANKS(ADDR_SURF_16_BANK));
2394 break;
2395 case 2:
2396 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2399 NUM_BANKS(ADDR_SURF_16_BANK));
2400 break;
2401 case 3:
2402 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405 NUM_BANKS(ADDR_SURF_16_BANK));
2406 break;
2407 case 4:
2408 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2411 NUM_BANKS(ADDR_SURF_16_BANK));
2412 break;
2413 case 5:
2414 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2417 NUM_BANKS(ADDR_SURF_16_BANK));
2418 break;
2419 case 6:
2420 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423 NUM_BANKS(ADDR_SURF_8_BANK));
2424 break;
2425 case 8:
2426 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2429 NUM_BANKS(ADDR_SURF_16_BANK));
2430 break;
2431 case 9:
2432 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2435 NUM_BANKS(ADDR_SURF_16_BANK));
2436 break;
2437 case 10:
2438 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2441 NUM_BANKS(ADDR_SURF_16_BANK));
2442 break;
2443 case 11:
2444 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2447 NUM_BANKS(ADDR_SURF_16_BANK));
2448 break;
2449 case 12:
2450 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2453 NUM_BANKS(ADDR_SURF_16_BANK));
2454 break;
2455 case 13:
2456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2459 NUM_BANKS(ADDR_SURF_16_BANK));
2460 break;
2461 case 14:
2462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 NUM_BANKS(ADDR_SURF_8_BANK));
2466 break;
2467 default:
2468 gb_tile_moden = 0;
2469 break;
2470 }
2471 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2472 }
2473 } else
2474 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2475}
2476
2477/**
2478 * cik_select_se_sh - select which SE, SH to address
2479 *
2480 * @rdev: radeon_device pointer
2481 * @se_num: shader engine to address
2482 * @sh_num: sh block to address
2483 *
2484 * Select which SE, SH combinations to address. Certain
2485 * registers are instanced per SE or SH. 0xffffffff means
2486 * broadcast to all SEs or SHs (CIK).
2487 */
2488static void cik_select_se_sh(struct radeon_device *rdev,
2489 u32 se_num, u32 sh_num)
2490{
2491 u32 data = INSTANCE_BROADCAST_WRITES;
2492
2493 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
b0fe3d39 2494 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
8cc1a532
AD
2495 else if (se_num == 0xffffffff)
2496 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2497 else if (sh_num == 0xffffffff)
2498 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2499 else
2500 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2501 WREG32(GRBM_GFX_INDEX, data);
2502}
2503
2504/**
2505 * cik_create_bitmask - create a bitmask
2506 *
2507 * @bit_width: length of the mask
2508 *
2509 * create a variable length bit mask (CIK).
2510 * Returns the bitmask.
2511 */
2512static u32 cik_create_bitmask(u32 bit_width)
2513{
2514 u32 i, mask = 0;
2515
2516 for (i = 0; i < bit_width; i++) {
2517 mask <<= 1;
2518 mask |= 1;
2519 }
2520 return mask;
2521}
2522
2523/**
2524 * cik_select_se_sh - select which SE, SH to address
2525 *
2526 * @rdev: radeon_device pointer
2527 * @max_rb_num: max RBs (render backends) for the asic
2528 * @se_num: number of SEs (shader engines) for the asic
2529 * @sh_per_se: number of SH blocks per SE for the asic
2530 *
2531 * Calculates the bitmask of disabled RBs (CIK).
2532 * Returns the disabled RB bitmask.
2533 */
2534static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2535 u32 max_rb_num, u32 se_num,
2536 u32 sh_per_se)
2537{
2538 u32 data, mask;
2539
2540 data = RREG32(CC_RB_BACKEND_DISABLE);
2541 if (data & 1)
2542 data &= BACKEND_DISABLE_MASK;
2543 else
2544 data = 0;
2545 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2546
2547 data >>= BACKEND_DISABLE_SHIFT;
2548
2549 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2550
2551 return data & mask;
2552}
2553
2554/**
2555 * cik_setup_rb - setup the RBs on the asic
2556 *
2557 * @rdev: radeon_device pointer
2558 * @se_num: number of SEs (shader engines) for the asic
2559 * @sh_per_se: number of SH blocks per SE for the asic
2560 * @max_rb_num: max RBs (render backends) for the asic
2561 *
2562 * Configures per-SE/SH RB registers (CIK).
2563 */
2564static void cik_setup_rb(struct radeon_device *rdev,
2565 u32 se_num, u32 sh_per_se,
2566 u32 max_rb_num)
2567{
2568 int i, j;
2569 u32 data, mask;
2570 u32 disabled_rbs = 0;
2571 u32 enabled_rbs = 0;
2572
2573 for (i = 0; i < se_num; i++) {
2574 for (j = 0; j < sh_per_se; j++) {
2575 cik_select_se_sh(rdev, i, j);
2576 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2577 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2578 }
2579 }
2580 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2581
2582 mask = 1;
2583 for (i = 0; i < max_rb_num; i++) {
2584 if (!(disabled_rbs & mask))
2585 enabled_rbs |= mask;
2586 mask <<= 1;
2587 }
2588
2589 for (i = 0; i < se_num; i++) {
2590 cik_select_se_sh(rdev, i, 0xffffffff);
2591 data = 0;
2592 for (j = 0; j < sh_per_se; j++) {
2593 switch (enabled_rbs & 3) {
2594 case 1:
2595 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2596 break;
2597 case 2:
2598 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2599 break;
2600 case 3:
2601 default:
2602 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2603 break;
2604 }
2605 enabled_rbs >>= 2;
2606 }
2607 WREG32(PA_SC_RASTER_CONFIG, data);
2608 }
2609 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2610}
2611
2612/**
2613 * cik_gpu_init - setup the 3D engine
2614 *
2615 * @rdev: radeon_device pointer
2616 *
2617 * Configures the 3D engine and tiling configuration
2618 * registers so that the 3D engine is usable.
2619 */
2620static void cik_gpu_init(struct radeon_device *rdev)
2621{
2622 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2623 u32 mc_shared_chmap, mc_arb_ramcfg;
2624 u32 hdp_host_path_cntl;
2625 u32 tmp;
2626 int i, j;
2627
2628 switch (rdev->family) {
2629 case CHIP_BONAIRE:
2630 rdev->config.cik.max_shader_engines = 2;
2631 rdev->config.cik.max_tile_pipes = 4;
2632 rdev->config.cik.max_cu_per_sh = 7;
2633 rdev->config.cik.max_sh_per_se = 1;
2634 rdev->config.cik.max_backends_per_se = 2;
2635 rdev->config.cik.max_texture_channel_caches = 4;
2636 rdev->config.cik.max_gprs = 256;
2637 rdev->config.cik.max_gs_threads = 32;
2638 rdev->config.cik.max_hw_contexts = 8;
2639
2640 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2641 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2642 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2643 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2644 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2645 break;
2646 case CHIP_KAVERI:
2647 /* TODO */
2648 break;
2649 case CHIP_KABINI:
2650 default:
2651 rdev->config.cik.max_shader_engines = 1;
2652 rdev->config.cik.max_tile_pipes = 2;
2653 rdev->config.cik.max_cu_per_sh = 2;
2654 rdev->config.cik.max_sh_per_se = 1;
2655 rdev->config.cik.max_backends_per_se = 1;
2656 rdev->config.cik.max_texture_channel_caches = 2;
2657 rdev->config.cik.max_gprs = 256;
2658 rdev->config.cik.max_gs_threads = 16;
2659 rdev->config.cik.max_hw_contexts = 8;
2660
2661 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2662 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2663 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2664 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2665 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2666 break;
2667 }
2668
2669 /* Initialize HDP */
2670 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2671 WREG32((0x2c14 + j), 0x00000000);
2672 WREG32((0x2c18 + j), 0x00000000);
2673 WREG32((0x2c1c + j), 0x00000000);
2674 WREG32((0x2c20 + j), 0x00000000);
2675 WREG32((0x2c24 + j), 0x00000000);
2676 }
2677
2678 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2679
2680 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2681
2682 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2683 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2684
2685 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2686 rdev->config.cik.mem_max_burst_length_bytes = 256;
2687 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2688 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2689 if (rdev->config.cik.mem_row_size_in_kb > 4)
2690 rdev->config.cik.mem_row_size_in_kb = 4;
2691 /* XXX use MC settings? */
2692 rdev->config.cik.shader_engine_tile_size = 32;
2693 rdev->config.cik.num_gpus = 1;
2694 rdev->config.cik.multi_gpu_tile_size = 64;
2695
2696 /* fix up row size */
2697 gb_addr_config &= ~ROW_SIZE_MASK;
2698 switch (rdev->config.cik.mem_row_size_in_kb) {
2699 case 1:
2700 default:
2701 gb_addr_config |= ROW_SIZE(0);
2702 break;
2703 case 2:
2704 gb_addr_config |= ROW_SIZE(1);
2705 break;
2706 case 4:
2707 gb_addr_config |= ROW_SIZE(2);
2708 break;
2709 }
2710
2711 /* setup tiling info dword. gb_addr_config is not adequate since it does
2712 * not have bank info, so create a custom tiling dword.
2713 * bits 3:0 num_pipes
2714 * bits 7:4 num_banks
2715 * bits 11:8 group_size
2716 * bits 15:12 row_size
2717 */
2718 rdev->config.cik.tile_config = 0;
2719 switch (rdev->config.cik.num_tile_pipes) {
2720 case 1:
2721 rdev->config.cik.tile_config |= (0 << 0);
2722 break;
2723 case 2:
2724 rdev->config.cik.tile_config |= (1 << 0);
2725 break;
2726 case 4:
2727 rdev->config.cik.tile_config |= (2 << 0);
2728 break;
2729 case 8:
2730 default:
2731 /* XXX what about 12? */
2732 rdev->config.cik.tile_config |= (3 << 0);
2733 break;
2734 }
2735 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2736 rdev->config.cik.tile_config |= 1 << 4;
2737 else
2738 rdev->config.cik.tile_config |= 0 << 4;
2739 rdev->config.cik.tile_config |=
2740 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2741 rdev->config.cik.tile_config |=
2742 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2743
2744 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2745 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2746 WREG32(DMIF_ADDR_CALC, gb_addr_config);
21a93e13
AD
2747 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2748 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
87167bb1
CK
2749 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2750 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2751 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
8cc1a532
AD
2752
2753 cik_tiling_mode_table_init(rdev);
2754
2755 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2756 rdev->config.cik.max_sh_per_se,
2757 rdev->config.cik.max_backends_per_se);
2758
2759 /* set HW defaults for 3D engine */
2760 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2761
2762 WREG32(SX_DEBUG_1, 0x20);
2763
2764 WREG32(TA_CNTL_AUX, 0x00010000);
2765
2766 tmp = RREG32(SPI_CONFIG_CNTL);
2767 tmp |= 0x03000000;
2768 WREG32(SPI_CONFIG_CNTL, tmp);
2769
2770 WREG32(SQ_CONFIG, 1);
2771
2772 WREG32(DB_DEBUG, 0);
2773
2774 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2775 tmp |= 0x00000400;
2776 WREG32(DB_DEBUG2, tmp);
2777
2778 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2779 tmp |= 0x00020200;
2780 WREG32(DB_DEBUG3, tmp);
2781
2782 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2783 tmp |= 0x00018208;
2784 WREG32(CB_HW_CONTROL, tmp);
2785
2786 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2787
2788 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2789 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2790 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2791 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2792
2793 WREG32(VGT_NUM_INSTANCES, 1);
2794
2795 WREG32(CP_PERFMON_CNTL, 0);
2796
2797 WREG32(SQ_CONFIG, 0);
2798
2799 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2800 FORCE_EOV_MAX_REZ_CNT(255)));
2801
2802 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2803 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2804
2805 WREG32(VGT_GS_VERTEX_REUSE, 16);
2806 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2807
2808 tmp = RREG32(HDP_MISC_CNTL);
2809 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2810 WREG32(HDP_MISC_CNTL, tmp);
2811
2812 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2813 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2814
2815 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2816 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2817
2818 udelay(50);
2819}
2820
2cae3bc3
AD
2821/*
2822 * GPU scratch registers helpers function.
2823 */
2824/**
2825 * cik_scratch_init - setup driver info for CP scratch regs
2826 *
2827 * @rdev: radeon_device pointer
2828 *
2829 * Set up the number and offset of the CP scratch registers.
2830 * NOTE: use of CP scratch registers is a legacy inferface and
2831 * is not used by default on newer asics (r6xx+). On newer asics,
2832 * memory buffers are used for fences rather than scratch regs.
2833 */
2834static void cik_scratch_init(struct radeon_device *rdev)
2835{
2836 int i;
2837
2838 rdev->scratch.num_reg = 7;
2839 rdev->scratch.reg_base = SCRATCH_REG0;
2840 for (i = 0; i < rdev->scratch.num_reg; i++) {
2841 rdev->scratch.free[i] = true;
2842 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2843 }
2844}
2845
fbc832c7
AD
2846/**
2847 * cik_ring_test - basic gfx ring test
2848 *
2849 * @rdev: radeon_device pointer
2850 * @ring: radeon_ring structure holding ring information
2851 *
2852 * Allocate a scratch register and write to it using the gfx ring (CIK).
2853 * Provides a basic gfx ring test to verify that the ring is working.
2854 * Used by cik_cp_gfx_resume();
2855 * Returns 0 on success, error on failure.
2856 */
2857int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2858{
2859 uint32_t scratch;
2860 uint32_t tmp = 0;
2861 unsigned i;
2862 int r;
2863
2864 r = radeon_scratch_get(rdev, &scratch);
2865 if (r) {
2866 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2867 return r;
2868 }
2869 WREG32(scratch, 0xCAFEDEAD);
2870 r = radeon_ring_lock(rdev, ring, 3);
2871 if (r) {
2872 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2873 radeon_scratch_free(rdev, scratch);
2874 return r;
2875 }
2876 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2877 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2878 radeon_ring_write(ring, 0xDEADBEEF);
2879 radeon_ring_unlock_commit(rdev, ring);
963e81f9 2880
fbc832c7
AD
2881 for (i = 0; i < rdev->usec_timeout; i++) {
2882 tmp = RREG32(scratch);
2883 if (tmp == 0xDEADBEEF)
2884 break;
2885 DRM_UDELAY(1);
2886 }
2887 if (i < rdev->usec_timeout) {
2888 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2889 } else {
2890 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2891 ring->idx, scratch, tmp);
2892 r = -EINVAL;
2893 }
2894 radeon_scratch_free(rdev, scratch);
2895 return r;
2896}
2897
2cae3bc3 2898/**
b07fdd38 2899 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2cae3bc3
AD
2900 *
2901 * @rdev: radeon_device pointer
2902 * @fence: radeon fence object
2903 *
2904 * Emits a fence sequnce number on the gfx ring and flushes
2905 * GPU caches.
2906 */
b07fdd38
AD
2907void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2908 struct radeon_fence *fence)
2cae3bc3
AD
2909{
2910 struct radeon_ring *ring = &rdev->ring[fence->ring];
2911 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2912
2913 /* EVENT_WRITE_EOP - flush caches, send int */
2914 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2915 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2916 EOP_TC_ACTION_EN |
2917 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2918 EVENT_INDEX(5)));
2919 radeon_ring_write(ring, addr & 0xfffffffc);
2920 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2921 radeon_ring_write(ring, fence->seq);
2922 radeon_ring_write(ring, 0);
2923 /* HDP flush */
2924 /* We should be using the new WAIT_REG_MEM special op packet here
2925 * but it causes the CP to hang
2926 */
2927 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2928 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2929 WRITE_DATA_DST_SEL(0)));
2930 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2931 radeon_ring_write(ring, 0);
2932 radeon_ring_write(ring, 0);
2933}
2934
b07fdd38
AD
2935/**
2936 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2937 *
2938 * @rdev: radeon_device pointer
2939 * @fence: radeon fence object
2940 *
2941 * Emits a fence sequnce number on the compute ring and flushes
2942 * GPU caches.
2943 */
2944void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2945 struct radeon_fence *fence)
2946{
2947 struct radeon_ring *ring = &rdev->ring[fence->ring];
2948 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2949
2950 /* RELEASE_MEM - flush caches, send int */
2951 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2952 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2953 EOP_TC_ACTION_EN |
2954 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2955 EVENT_INDEX(5)));
2956 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2957 radeon_ring_write(ring, addr & 0xfffffffc);
2958 radeon_ring_write(ring, upper_32_bits(addr));
2959 radeon_ring_write(ring, fence->seq);
2960 radeon_ring_write(ring, 0);
2961 /* HDP flush */
2962 /* We should be using the new WAIT_REG_MEM special op packet here
2963 * but it causes the CP to hang
2964 */
2965 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2966 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2967 WRITE_DATA_DST_SEL(0)));
2968 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2969 radeon_ring_write(ring, 0);
2970 radeon_ring_write(ring, 0);
2971}
2972
2cae3bc3
AD
2973void cik_semaphore_ring_emit(struct radeon_device *rdev,
2974 struct radeon_ring *ring,
2975 struct radeon_semaphore *semaphore,
2976 bool emit_wait)
2977{
2978 uint64_t addr = semaphore->gpu_addr;
2979 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2980
2981 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2982 radeon_ring_write(ring, addr & 0xffffffff);
2983 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2984}
2985
2986/*
2987 * IB stuff
2988 */
2989/**
2990 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2991 *
2992 * @rdev: radeon_device pointer
2993 * @ib: radeon indirect buffer object
2994 *
2995 * Emits an DE (drawing engine) or CE (constant engine) IB
2996 * on the gfx ring. IBs are usually generated by userspace
2997 * acceleration drivers and submitted to the kernel for
2998 * sheduling on the ring. This function schedules the IB
2999 * on the gfx ring for execution by the GPU.
3000 */
3001void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3002{
3003 struct radeon_ring *ring = &rdev->ring[ib->ring];
3004 u32 header, control = INDIRECT_BUFFER_VALID;
3005
3006 if (ib->is_const_ib) {
3007 /* set switch buffer packet before const IB */
3008 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3009 radeon_ring_write(ring, 0);
3010
3011 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3012 } else {
3013 u32 next_rptr;
3014 if (ring->rptr_save_reg) {
3015 next_rptr = ring->wptr + 3 + 4;
3016 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3017 radeon_ring_write(ring, ((ring->rptr_save_reg -
3018 PACKET3_SET_UCONFIG_REG_START) >> 2));
3019 radeon_ring_write(ring, next_rptr);
3020 } else if (rdev->wb.enabled) {
3021 next_rptr = ring->wptr + 5 + 4;
3022 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3023 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3024 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3025 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3026 radeon_ring_write(ring, next_rptr);
3027 }
3028
3029 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3030 }
3031
3032 control |= ib->length_dw |
3033 (ib->vm ? (ib->vm->id << 24) : 0);
3034
3035 radeon_ring_write(ring, header);
3036 radeon_ring_write(ring,
3037#ifdef __BIG_ENDIAN
3038 (2 << 0) |
3039#endif
3040 (ib->gpu_addr & 0xFFFFFFFC));
3041 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3042 radeon_ring_write(ring, control);
3043}
3044
fbc832c7
AD
3045/**
3046 * cik_ib_test - basic gfx ring IB test
3047 *
3048 * @rdev: radeon_device pointer
3049 * @ring: radeon_ring structure holding ring information
3050 *
3051 * Allocate an IB and execute it on the gfx ring (CIK).
3052 * Provides a basic gfx ring test to verify that IBs are working.
3053 * Returns 0 on success, error on failure.
3054 */
3055int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3056{
3057 struct radeon_ib ib;
3058 uint32_t scratch;
3059 uint32_t tmp = 0;
3060 unsigned i;
3061 int r;
3062
3063 r = radeon_scratch_get(rdev, &scratch);
3064 if (r) {
3065 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3066 return r;
3067 }
3068 WREG32(scratch, 0xCAFEDEAD);
3069 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3070 if (r) {
3071 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3072 return r;
3073 }
3074 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3075 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3076 ib.ptr[2] = 0xDEADBEEF;
3077 ib.length_dw = 3;
3078 r = radeon_ib_schedule(rdev, &ib, NULL);
3079 if (r) {
3080 radeon_scratch_free(rdev, scratch);
3081 radeon_ib_free(rdev, &ib);
3082 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3083 return r;
3084 }
3085 r = radeon_fence_wait(ib.fence, false);
3086 if (r) {
3087 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3088 return r;
3089 }
3090 for (i = 0; i < rdev->usec_timeout; i++) {
3091 tmp = RREG32(scratch);
3092 if (tmp == 0xDEADBEEF)
3093 break;
3094 DRM_UDELAY(1);
3095 }
3096 if (i < rdev->usec_timeout) {
3097 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3098 } else {
3099 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3100 scratch, tmp);
3101 r = -EINVAL;
3102 }
3103 radeon_scratch_free(rdev, scratch);
3104 radeon_ib_free(rdev, &ib);
3105 return r;
3106}
3107
841cf442
AD
3108/*
3109 * CP.
3110 * On CIK, gfx and compute now have independant command processors.
3111 *
3112 * GFX
3113 * Gfx consists of a single ring and can process both gfx jobs and
3114 * compute jobs. The gfx CP consists of three microengines (ME):
3115 * PFP - Pre-Fetch Parser
3116 * ME - Micro Engine
3117 * CE - Constant Engine
3118 * The PFP and ME make up what is considered the Drawing Engine (DE).
3119 * The CE is an asynchronous engine used for updating buffer desciptors
3120 * used by the DE so that they can be loaded into cache in parallel
3121 * while the DE is processing state update packets.
3122 *
3123 * Compute
3124 * The compute CP consists of two microengines (ME):
3125 * MEC1 - Compute MicroEngine 1
3126 * MEC2 - Compute MicroEngine 2
3127 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3128 * The queues are exposed to userspace and are programmed directly
3129 * by the compute runtime.
3130 */
3131/**
3132 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3133 *
3134 * @rdev: radeon_device pointer
3135 * @enable: enable or disable the MEs
3136 *
3137 * Halts or unhalts the gfx MEs.
3138 */
3139static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3140{
3141 if (enable)
3142 WREG32(CP_ME_CNTL, 0);
3143 else {
3144 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3145 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3146 }
3147 udelay(50);
3148}
3149
3150/**
3151 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3152 *
3153 * @rdev: radeon_device pointer
3154 *
3155 * Loads the gfx PFP, ME, and CE ucode.
3156 * Returns 0 for success, -EINVAL if the ucode is not available.
3157 */
3158static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3159{
3160 const __be32 *fw_data;
3161 int i;
3162
3163 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3164 return -EINVAL;
3165
3166 cik_cp_gfx_enable(rdev, false);
3167
3168 /* PFP */
3169 fw_data = (const __be32 *)rdev->pfp_fw->data;
3170 WREG32(CP_PFP_UCODE_ADDR, 0);
3171 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3172 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3173 WREG32(CP_PFP_UCODE_ADDR, 0);
3174
3175 /* CE */
3176 fw_data = (const __be32 *)rdev->ce_fw->data;
3177 WREG32(CP_CE_UCODE_ADDR, 0);
3178 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3179 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3180 WREG32(CP_CE_UCODE_ADDR, 0);
3181
3182 /* ME */
3183 fw_data = (const __be32 *)rdev->me_fw->data;
3184 WREG32(CP_ME_RAM_WADDR, 0);
3185 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3186 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3187 WREG32(CP_ME_RAM_WADDR, 0);
3188
3189 WREG32(CP_PFP_UCODE_ADDR, 0);
3190 WREG32(CP_CE_UCODE_ADDR, 0);
3191 WREG32(CP_ME_RAM_WADDR, 0);
3192 WREG32(CP_ME_RAM_RADDR, 0);
3193 return 0;
3194}
3195
3196/**
3197 * cik_cp_gfx_start - start the gfx ring
3198 *
3199 * @rdev: radeon_device pointer
3200 *
3201 * Enables the ring and loads the clear state context and other
3202 * packets required to init the ring.
3203 * Returns 0 for success, error for failure.
3204 */
3205static int cik_cp_gfx_start(struct radeon_device *rdev)
3206{
3207 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3208 int r, i;
3209
3210 /* init the CP */
3211 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3212 WREG32(CP_ENDIAN_SWAP, 0);
3213 WREG32(CP_DEVICE_ID, 1);
3214
3215 cik_cp_gfx_enable(rdev, true);
3216
3217 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3218 if (r) {
3219 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3220 return r;
3221 }
3222
3223 /* init the CE partitions. CE only used for gfx on CIK */
3224 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3225 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3226 radeon_ring_write(ring, 0xc000);
3227 radeon_ring_write(ring, 0xc000);
3228
3229 /* setup clear context state */
3230 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3231 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3232
3233 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3234 radeon_ring_write(ring, 0x80000000);
3235 radeon_ring_write(ring, 0x80000000);
3236
3237 for (i = 0; i < cik_default_size; i++)
3238 radeon_ring_write(ring, cik_default_state[i]);
3239
3240 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3241 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3242
3243 /* set clear context state */
3244 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3245 radeon_ring_write(ring, 0);
3246
3247 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3248 radeon_ring_write(ring, 0x00000316);
3249 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3250 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3251
3252 radeon_ring_unlock_commit(rdev, ring);
3253
3254 return 0;
3255}
3256
3257/**
3258 * cik_cp_gfx_fini - stop the gfx ring
3259 *
3260 * @rdev: radeon_device pointer
3261 *
3262 * Stop the gfx ring and tear down the driver ring
3263 * info.
3264 */
3265static void cik_cp_gfx_fini(struct radeon_device *rdev)
3266{
3267 cik_cp_gfx_enable(rdev, false);
3268 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3269}
3270
3271/**
3272 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3273 *
3274 * @rdev: radeon_device pointer
3275 *
3276 * Program the location and size of the gfx ring buffer
3277 * and test it to make sure it's working.
3278 * Returns 0 for success, error for failure.
3279 */
3280static int cik_cp_gfx_resume(struct radeon_device *rdev)
3281{
3282 struct radeon_ring *ring;
3283 u32 tmp;
3284 u32 rb_bufsz;
3285 u64 rb_addr;
3286 int r;
3287
3288 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3289 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3290
3291 /* Set the write pointer delay */
3292 WREG32(CP_RB_WPTR_DELAY, 0);
3293
3294 /* set the RB to use vmid 0 */
3295 WREG32(CP_RB_VMID, 0);
3296
3297 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3298
3299 /* ring 0 - compute and gfx */
3300 /* Set ring buffer size */
3301 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3302 rb_bufsz = drm_order(ring->ring_size / 8);
3303 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3304#ifdef __BIG_ENDIAN
3305 tmp |= BUF_SWAP_32BIT;
3306#endif
3307 WREG32(CP_RB0_CNTL, tmp);
3308
3309 /* Initialize the ring buffer's read and write pointers */
3310 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3311 ring->wptr = 0;
3312 WREG32(CP_RB0_WPTR, ring->wptr);
3313
3314 /* set the wb address wether it's enabled or not */
3315 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3316 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3317
3318 /* scratch register shadowing is no longer supported */
3319 WREG32(SCRATCH_UMSK, 0);
3320
3321 if (!rdev->wb.enabled)
3322 tmp |= RB_NO_UPDATE;
3323
3324 mdelay(1);
3325 WREG32(CP_RB0_CNTL, tmp);
3326
3327 rb_addr = ring->gpu_addr >> 8;
3328 WREG32(CP_RB0_BASE, rb_addr);
3329 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3330
3331 ring->rptr = RREG32(CP_RB0_RPTR);
3332
3333 /* start the ring */
3334 cik_cp_gfx_start(rdev);
3335 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3336 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3337 if (r) {
3338 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3339 return r;
3340 }
3341 return 0;
3342}
3343
963e81f9
AD
3344u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3345 struct radeon_ring *ring)
3346{
3347 u32 rptr;
3348
3349
3350
3351 if (rdev->wb.enabled) {
3352 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3353 } else {
f61d5b46 3354 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3355 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3356 rptr = RREG32(CP_HQD_PQ_RPTR);
3357 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3358 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3359 }
3360 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3361
3362 return rptr;
3363}
3364
3365u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3366 struct radeon_ring *ring)
3367{
3368 u32 wptr;
3369
3370 if (rdev->wb.enabled) {
3371 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3372 } else {
f61d5b46 3373 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3374 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3375 wptr = RREG32(CP_HQD_PQ_WPTR);
3376 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3377 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3378 }
3379 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3380
3381 return wptr;
3382}
3383
3384void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3385 struct radeon_ring *ring)
3386{
3387 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
3388
3389 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
3390 WDOORBELL32(ring->doorbell_offset, wptr);
3391}
3392
841cf442
AD
3393/**
3394 * cik_cp_compute_enable - enable/disable the compute CP MEs
3395 *
3396 * @rdev: radeon_device pointer
3397 * @enable: enable or disable the MEs
3398 *
3399 * Halts or unhalts the compute MEs.
3400 */
3401static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3402{
3403 if (enable)
3404 WREG32(CP_MEC_CNTL, 0);
3405 else
3406 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3407 udelay(50);
3408}
3409
3410/**
3411 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3412 *
3413 * @rdev: radeon_device pointer
3414 *
3415 * Loads the compute MEC1&2 ucode.
3416 * Returns 0 for success, -EINVAL if the ucode is not available.
3417 */
3418static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3419{
3420 const __be32 *fw_data;
3421 int i;
3422
3423 if (!rdev->mec_fw)
3424 return -EINVAL;
3425
3426 cik_cp_compute_enable(rdev, false);
3427
3428 /* MEC1 */
3429 fw_data = (const __be32 *)rdev->mec_fw->data;
3430 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3431 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3432 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3433 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3434
3435 if (rdev->family == CHIP_KAVERI) {
3436 /* MEC2 */
3437 fw_data = (const __be32 *)rdev->mec_fw->data;
3438 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3439 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3440 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3441 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3442 }
3443
3444 return 0;
3445}
3446
3447/**
3448 * cik_cp_compute_start - start the compute queues
3449 *
3450 * @rdev: radeon_device pointer
3451 *
3452 * Enable the compute queues.
3453 * Returns 0 for success, error for failure.
3454 */
3455static int cik_cp_compute_start(struct radeon_device *rdev)
3456{
963e81f9
AD
3457 cik_cp_compute_enable(rdev, true);
3458
841cf442
AD
3459 return 0;
3460}
3461
3462/**
3463 * cik_cp_compute_fini - stop the compute queues
3464 *
3465 * @rdev: radeon_device pointer
3466 *
3467 * Stop the compute queues and tear down the driver queue
3468 * info.
3469 */
3470static void cik_cp_compute_fini(struct radeon_device *rdev)
3471{
963e81f9
AD
3472 int i, idx, r;
3473
841cf442 3474 cik_cp_compute_enable(rdev, false);
963e81f9
AD
3475
3476 for (i = 0; i < 2; i++) {
3477 if (i == 0)
3478 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3479 else
3480 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3481
3482 if (rdev->ring[idx].mqd_obj) {
3483 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3484 if (unlikely(r != 0))
3485 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3486
3487 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3488 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3489
3490 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3491 rdev->ring[idx].mqd_obj = NULL;
3492 }
3493 }
841cf442
AD
3494}
3495
963e81f9
AD
3496static void cik_mec_fini(struct radeon_device *rdev)
3497{
3498 int r;
3499
3500 if (rdev->mec.hpd_eop_obj) {
3501 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3502 if (unlikely(r != 0))
3503 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3504 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3505 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3506
3507 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3508 rdev->mec.hpd_eop_obj = NULL;
3509 }
3510}
3511
3512#define MEC_HPD_SIZE 2048
3513
3514static int cik_mec_init(struct radeon_device *rdev)
3515{
3516 int r;
3517 u32 *hpd;
3518
3519 /*
3520 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3521 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3522 */
3523 if (rdev->family == CHIP_KAVERI)
3524 rdev->mec.num_mec = 2;
3525 else
3526 rdev->mec.num_mec = 1;
3527 rdev->mec.num_pipe = 4;
3528 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3529
3530 if (rdev->mec.hpd_eop_obj == NULL) {
3531 r = radeon_bo_create(rdev,
3532 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3533 PAGE_SIZE, true,
3534 RADEON_GEM_DOMAIN_GTT, NULL,
3535 &rdev->mec.hpd_eop_obj);
3536 if (r) {
3537 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3538 return r;
3539 }
3540 }
3541
3542 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3543 if (unlikely(r != 0)) {
3544 cik_mec_fini(rdev);
3545 return r;
3546 }
3547 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3548 &rdev->mec.hpd_eop_gpu_addr);
3549 if (r) {
3550 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3551 cik_mec_fini(rdev);
3552 return r;
3553 }
3554 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3555 if (r) {
3556 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3557 cik_mec_fini(rdev);
3558 return r;
3559 }
3560
3561 /* clear memory. Not sure if this is required or not */
3562 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3563
3564 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3565 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3566
3567 return 0;
3568}
3569
3570struct hqd_registers
3571{
3572 u32 cp_mqd_base_addr;
3573 u32 cp_mqd_base_addr_hi;
3574 u32 cp_hqd_active;
3575 u32 cp_hqd_vmid;
3576 u32 cp_hqd_persistent_state;
3577 u32 cp_hqd_pipe_priority;
3578 u32 cp_hqd_queue_priority;
3579 u32 cp_hqd_quantum;
3580 u32 cp_hqd_pq_base;
3581 u32 cp_hqd_pq_base_hi;
3582 u32 cp_hqd_pq_rptr;
3583 u32 cp_hqd_pq_rptr_report_addr;
3584 u32 cp_hqd_pq_rptr_report_addr_hi;
3585 u32 cp_hqd_pq_wptr_poll_addr;
3586 u32 cp_hqd_pq_wptr_poll_addr_hi;
3587 u32 cp_hqd_pq_doorbell_control;
3588 u32 cp_hqd_pq_wptr;
3589 u32 cp_hqd_pq_control;
3590 u32 cp_hqd_ib_base_addr;
3591 u32 cp_hqd_ib_base_addr_hi;
3592 u32 cp_hqd_ib_rptr;
3593 u32 cp_hqd_ib_control;
3594 u32 cp_hqd_iq_timer;
3595 u32 cp_hqd_iq_rptr;
3596 u32 cp_hqd_dequeue_request;
3597 u32 cp_hqd_dma_offload;
3598 u32 cp_hqd_sema_cmd;
3599 u32 cp_hqd_msg_type;
3600 u32 cp_hqd_atomic0_preop_lo;
3601 u32 cp_hqd_atomic0_preop_hi;
3602 u32 cp_hqd_atomic1_preop_lo;
3603 u32 cp_hqd_atomic1_preop_hi;
3604 u32 cp_hqd_hq_scheduler0;
3605 u32 cp_hqd_hq_scheduler1;
3606 u32 cp_mqd_control;
3607};
3608
3609struct bonaire_mqd
3610{
3611 u32 header;
3612 u32 dispatch_initiator;
3613 u32 dimensions[3];
3614 u32 start_idx[3];
3615 u32 num_threads[3];
3616 u32 pipeline_stat_enable;
3617 u32 perf_counter_enable;
3618 u32 pgm[2];
3619 u32 tba[2];
3620 u32 tma[2];
3621 u32 pgm_rsrc[2];
3622 u32 vmid;
3623 u32 resource_limits;
3624 u32 static_thread_mgmt01[2];
3625 u32 tmp_ring_size;
3626 u32 static_thread_mgmt23[2];
3627 u32 restart[3];
3628 u32 thread_trace_enable;
3629 u32 reserved1;
3630 u32 user_data[16];
3631 u32 vgtcs_invoke_count[2];
3632 struct hqd_registers queue_state;
3633 u32 dequeue_cntr;
3634 u32 interrupt_queue[64];
3635};
3636
841cf442
AD
3637/**
3638 * cik_cp_compute_resume - setup the compute queue registers
3639 *
3640 * @rdev: radeon_device pointer
3641 *
3642 * Program the compute queues and test them to make sure they
3643 * are working.
3644 * Returns 0 for success, error for failure.
3645 */
3646static int cik_cp_compute_resume(struct radeon_device *rdev)
3647{
963e81f9
AD
3648 int r, i, idx;
3649 u32 tmp;
3650 bool use_doorbell = true;
3651 u64 hqd_gpu_addr;
3652 u64 mqd_gpu_addr;
3653 u64 eop_gpu_addr;
3654 u64 wb_gpu_addr;
3655 u32 *buf;
3656 struct bonaire_mqd *mqd;
841cf442 3657
841cf442
AD
3658 r = cik_cp_compute_start(rdev);
3659 if (r)
3660 return r;
963e81f9
AD
3661
3662 /* fix up chicken bits */
3663 tmp = RREG32(CP_CPF_DEBUG);
3664 tmp |= (1 << 23);
3665 WREG32(CP_CPF_DEBUG, tmp);
3666
3667 /* init the pipes */
f61d5b46 3668 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3669 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3670 int me = (i < 4) ? 1 : 2;
3671 int pipe = (i < 4) ? i : (i - 4);
3672
3673 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3674
3675 cik_srbm_select(rdev, me, pipe, 0, 0);
3676
3677 /* write the EOP addr */
3678 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3679 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3680
3681 /* set the VMID assigned */
3682 WREG32(CP_HPD_EOP_VMID, 0);
3683
3684 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3685 tmp = RREG32(CP_HPD_EOP_CONTROL);
3686 tmp &= ~EOP_SIZE_MASK;
3687 tmp |= drm_order(MEC_HPD_SIZE / 8);
3688 WREG32(CP_HPD_EOP_CONTROL, tmp);
3689 }
3690 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3691 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3692
3693 /* init the queues. Just two for now. */
3694 for (i = 0; i < 2; i++) {
3695 if (i == 0)
3696 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3697 else
3698 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3699
3700 if (rdev->ring[idx].mqd_obj == NULL) {
3701 r = radeon_bo_create(rdev,
3702 sizeof(struct bonaire_mqd),
3703 PAGE_SIZE, true,
3704 RADEON_GEM_DOMAIN_GTT, NULL,
3705 &rdev->ring[idx].mqd_obj);
3706 if (r) {
3707 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3708 return r;
3709 }
3710 }
3711
3712 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3713 if (unlikely(r != 0)) {
3714 cik_cp_compute_fini(rdev);
3715 return r;
3716 }
3717 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3718 &mqd_gpu_addr);
3719 if (r) {
3720 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3721 cik_cp_compute_fini(rdev);
3722 return r;
3723 }
3724 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3725 if (r) {
3726 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3727 cik_cp_compute_fini(rdev);
3728 return r;
3729 }
3730
3731 /* doorbell offset */
3732 rdev->ring[idx].doorbell_offset =
3733 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3734
3735 /* init the mqd struct */
3736 memset(buf, 0, sizeof(struct bonaire_mqd));
3737
3738 mqd = (struct bonaire_mqd *)buf;
3739 mqd->header = 0xC0310800;
3740 mqd->static_thread_mgmt01[0] = 0xffffffff;
3741 mqd->static_thread_mgmt01[1] = 0xffffffff;
3742 mqd->static_thread_mgmt23[0] = 0xffffffff;
3743 mqd->static_thread_mgmt23[1] = 0xffffffff;
3744
f61d5b46 3745 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3746 cik_srbm_select(rdev, rdev->ring[idx].me,
3747 rdev->ring[idx].pipe,
3748 rdev->ring[idx].queue, 0);
3749
3750 /* disable wptr polling */
3751 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3752 tmp &= ~WPTR_POLL_EN;
3753 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3754
3755 /* enable doorbell? */
3756 mqd->queue_state.cp_hqd_pq_doorbell_control =
3757 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3758 if (use_doorbell)
3759 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3760 else
3761 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3762 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3763 mqd->queue_state.cp_hqd_pq_doorbell_control);
3764
3765 /* disable the queue if it's active */
3766 mqd->queue_state.cp_hqd_dequeue_request = 0;
3767 mqd->queue_state.cp_hqd_pq_rptr = 0;
3768 mqd->queue_state.cp_hqd_pq_wptr= 0;
3769 if (RREG32(CP_HQD_ACTIVE) & 1) {
3770 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3771 for (i = 0; i < rdev->usec_timeout; i++) {
3772 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3773 break;
3774 udelay(1);
3775 }
3776 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3777 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3778 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3779 }
3780
3781 /* set the pointer to the MQD */
3782 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3783 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3784 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3785 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3786 /* set MQD vmid to 0 */
3787 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3788 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3789 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3790
3791 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3792 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3793 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3794 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3795 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3796 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3797
3798 /* set up the HQD, this is similar to CP_RB0_CNTL */
3799 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3800 mqd->queue_state.cp_hqd_pq_control &=
3801 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3802
3803 mqd->queue_state.cp_hqd_pq_control |=
3804 drm_order(rdev->ring[idx].ring_size / 8);
3805 mqd->queue_state.cp_hqd_pq_control |=
3806 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3807#ifdef __BIG_ENDIAN
3808 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3809#endif
3810 mqd->queue_state.cp_hqd_pq_control &=
3811 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3812 mqd->queue_state.cp_hqd_pq_control |=
3813 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3814 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3815
3816 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3817 if (i == 0)
3818 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3819 else
3820 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3821 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3822 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3823 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3824 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3825 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3826
3827 /* set the wb address wether it's enabled or not */
3828 if (i == 0)
3829 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3830 else
3831 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3832 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3833 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3834 upper_32_bits(wb_gpu_addr) & 0xffff;
3835 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3836 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3837 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3838 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3839
3840 /* enable the doorbell if requested */
3841 if (use_doorbell) {
3842 mqd->queue_state.cp_hqd_pq_doorbell_control =
3843 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3844 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3845 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3846 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3847 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3848 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3849 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3850
3851 } else {
3852 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3853 }
3854 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3855 mqd->queue_state.cp_hqd_pq_doorbell_control);
3856
3857 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3858 rdev->ring[idx].wptr = 0;
3859 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3860 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3861 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3862 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3863
3864 /* set the vmid for the queue */
3865 mqd->queue_state.cp_hqd_vmid = 0;
3866 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3867
3868 /* activate the queue */
3869 mqd->queue_state.cp_hqd_active = 1;
3870 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3871
3872 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3873 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3874
3875 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3876 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3877
3878 rdev->ring[idx].ready = true;
3879 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3880 if (r)
3881 rdev->ring[idx].ready = false;
3882 }
3883
841cf442
AD
3884 return 0;
3885}
3886
841cf442
AD
3887static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3888{
3889 cik_cp_gfx_enable(rdev, enable);
3890 cik_cp_compute_enable(rdev, enable);
3891}
3892
841cf442
AD
3893static int cik_cp_load_microcode(struct radeon_device *rdev)
3894{
3895 int r;
3896
3897 r = cik_cp_gfx_load_microcode(rdev);
3898 if (r)
3899 return r;
3900 r = cik_cp_compute_load_microcode(rdev);
3901 if (r)
3902 return r;
3903
3904 return 0;
3905}
3906
841cf442
AD
3907static void cik_cp_fini(struct radeon_device *rdev)
3908{
3909 cik_cp_gfx_fini(rdev);
3910 cik_cp_compute_fini(rdev);
3911}
3912
841cf442
AD
3913static int cik_cp_resume(struct radeon_device *rdev)
3914{
3915 int r;
3916
3917 /* Reset all cp blocks */
3918 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3919 RREG32(GRBM_SOFT_RESET);
3920 mdelay(15);
3921 WREG32(GRBM_SOFT_RESET, 0);
3922 RREG32(GRBM_SOFT_RESET);
3923
3924 r = cik_cp_load_microcode(rdev);
3925 if (r)
3926 return r;
3927
3928 r = cik_cp_gfx_resume(rdev);
3929 if (r)
3930 return r;
3931 r = cik_cp_compute_resume(rdev);
3932 if (r)
3933 return r;
3934
3935 return 0;
3936}
3937
21a93e13
AD
3938/*
3939 * sDMA - System DMA
3940 * Starting with CIK, the GPU has new asynchronous
3941 * DMA engines. These engines are used for compute
3942 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3943 * and each one supports 1 ring buffer used for gfx
3944 * and 2 queues used for compute.
3945 *
3946 * The programming model is very similar to the CP
3947 * (ring buffer, IBs, etc.), but sDMA has it's own
3948 * packet format that is different from the PM4 format
3949 * used by the CP. sDMA supports copying data, writing
3950 * embedded data, solid fills, and a number of other
3951 * things. It also has support for tiling/detiling of
3952 * buffers.
3953 */
3954/**
3955 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3956 *
3957 * @rdev: radeon_device pointer
3958 * @ib: IB object to schedule
3959 *
3960 * Schedule an IB in the DMA ring (CIK).
3961 */
3962void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3963 struct radeon_ib *ib)
3964{
3965 struct radeon_ring *ring = &rdev->ring[ib->ring];
3966 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3967
3968 if (rdev->wb.enabled) {
3969 u32 next_rptr = ring->wptr + 5;
3970 while ((next_rptr & 7) != 4)
3971 next_rptr++;
3972 next_rptr += 4;
3973 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3974 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3975 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3976 radeon_ring_write(ring, 1); /* number of DWs to follow */
3977 radeon_ring_write(ring, next_rptr);
3978 }
3979
3980 /* IB packet must end on a 8 DW boundary */
3981 while ((ring->wptr & 7) != 4)
3982 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3983 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3984 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3985 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3986 radeon_ring_write(ring, ib->length_dw);
3987
3988}
3989
3990/**
3991 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3992 *
3993 * @rdev: radeon_device pointer
3994 * @fence: radeon fence object
3995 *
3996 * Add a DMA fence packet to the ring to write
3997 * the fence seq number and DMA trap packet to generate
3998 * an interrupt if needed (CIK).
3999 */
4000void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4001 struct radeon_fence *fence)
4002{
4003 struct radeon_ring *ring = &rdev->ring[fence->ring];
4004 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4005 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4006 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4007 u32 ref_and_mask;
4008
4009 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4010 ref_and_mask = SDMA0;
4011 else
4012 ref_and_mask = SDMA1;
4013
4014 /* write the fence */
4015 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4016 radeon_ring_write(ring, addr & 0xffffffff);
4017 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4018 radeon_ring_write(ring, fence->seq);
4019 /* generate an interrupt */
4020 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4021 /* flush HDP */
4022 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4023 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4024 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4025 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4026 radeon_ring_write(ring, ref_and_mask); /* MASK */
4027 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4028}
4029
4030/**
4031 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4032 *
4033 * @rdev: radeon_device pointer
4034 * @ring: radeon_ring structure holding ring information
4035 * @semaphore: radeon semaphore object
4036 * @emit_wait: wait or signal semaphore
4037 *
4038 * Add a DMA semaphore packet to the ring wait on or signal
4039 * other rings (CIK).
4040 */
4041void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4042 struct radeon_ring *ring,
4043 struct radeon_semaphore *semaphore,
4044 bool emit_wait)
4045{
4046 u64 addr = semaphore->gpu_addr;
4047 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4048
4049 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4050 radeon_ring_write(ring, addr & 0xfffffff8);
4051 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4052}
4053
4054/**
4055 * cik_sdma_gfx_stop - stop the gfx async dma engines
4056 *
4057 * @rdev: radeon_device pointer
4058 *
4059 * Stop the gfx async dma ring buffers (CIK).
4060 */
4061static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4062{
4063 u32 rb_cntl, reg_offset;
4064 int i;
4065
4066 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4067
4068 for (i = 0; i < 2; i++) {
4069 if (i == 0)
4070 reg_offset = SDMA0_REGISTER_OFFSET;
4071 else
4072 reg_offset = SDMA1_REGISTER_OFFSET;
4073 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4074 rb_cntl &= ~SDMA_RB_ENABLE;
4075 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4076 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4077 }
4078}
4079
4080/**
4081 * cik_sdma_rlc_stop - stop the compute async dma engines
4082 *
4083 * @rdev: radeon_device pointer
4084 *
4085 * Stop the compute async dma queues (CIK).
4086 */
4087static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4088{
4089 /* XXX todo */
4090}
4091
4092/**
4093 * cik_sdma_enable - stop the async dma engines
4094 *
4095 * @rdev: radeon_device pointer
4096 * @enable: enable/disable the DMA MEs.
4097 *
4098 * Halt or unhalt the async dma engines (CIK).
4099 */
4100static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4101{
4102 u32 me_cntl, reg_offset;
4103 int i;
4104
4105 for (i = 0; i < 2; i++) {
4106 if (i == 0)
4107 reg_offset = SDMA0_REGISTER_OFFSET;
4108 else
4109 reg_offset = SDMA1_REGISTER_OFFSET;
4110 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4111 if (enable)
4112 me_cntl &= ~SDMA_HALT;
4113 else
4114 me_cntl |= SDMA_HALT;
4115 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4116 }
4117}
4118
4119/**
4120 * cik_sdma_gfx_resume - setup and start the async dma engines
4121 *
4122 * @rdev: radeon_device pointer
4123 *
4124 * Set up the gfx DMA ring buffers and enable them (CIK).
4125 * Returns 0 for success, error for failure.
4126 */
4127static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4128{
4129 struct radeon_ring *ring;
4130 u32 rb_cntl, ib_cntl;
4131 u32 rb_bufsz;
4132 u32 reg_offset, wb_offset;
4133 int i, r;
4134
4135 for (i = 0; i < 2; i++) {
4136 if (i == 0) {
4137 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4138 reg_offset = SDMA0_REGISTER_OFFSET;
4139 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4140 } else {
4141 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4142 reg_offset = SDMA1_REGISTER_OFFSET;
4143 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4144 }
4145
4146 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4147 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4148
4149 /* Set ring buffer size in dwords */
4150 rb_bufsz = drm_order(ring->ring_size / 4);
4151 rb_cntl = rb_bufsz << 1;
4152#ifdef __BIG_ENDIAN
4153 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4154#endif
4155 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4156
4157 /* Initialize the ring buffer's read and write pointers */
4158 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4159 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4160
4161 /* set the wb address whether it's enabled or not */
4162 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4163 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4164 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4165 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4166
4167 if (rdev->wb.enabled)
4168 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4169
4170 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4171 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4172
4173 ring->wptr = 0;
4174 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4175
4176 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4177
4178 /* enable DMA RB */
4179 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4180
4181 ib_cntl = SDMA_IB_ENABLE;
4182#ifdef __BIG_ENDIAN
4183 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4184#endif
4185 /* enable DMA IBs */
4186 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4187
4188 ring->ready = true;
4189
4190 r = radeon_ring_test(rdev, ring->idx, ring);
4191 if (r) {
4192 ring->ready = false;
4193 return r;
4194 }
4195 }
4196
4197 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4198
4199 return 0;
4200}
4201
4202/**
4203 * cik_sdma_rlc_resume - setup and start the async dma engines
4204 *
4205 * @rdev: radeon_device pointer
4206 *
4207 * Set up the compute DMA queues and enable them (CIK).
4208 * Returns 0 for success, error for failure.
4209 */
4210static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4211{
4212 /* XXX todo */
4213 return 0;
4214}
4215
4216/**
4217 * cik_sdma_load_microcode - load the sDMA ME ucode
4218 *
4219 * @rdev: radeon_device pointer
4220 *
4221 * Loads the sDMA0/1 ucode.
4222 * Returns 0 for success, -EINVAL if the ucode is not available.
4223 */
4224static int cik_sdma_load_microcode(struct radeon_device *rdev)
4225{
4226 const __be32 *fw_data;
4227 int i;
4228
4229 if (!rdev->sdma_fw)
4230 return -EINVAL;
4231
4232 /* stop the gfx rings and rlc compute queues */
4233 cik_sdma_gfx_stop(rdev);
4234 cik_sdma_rlc_stop(rdev);
4235
4236 /* halt the MEs */
4237 cik_sdma_enable(rdev, false);
4238
4239 /* sdma0 */
4240 fw_data = (const __be32 *)rdev->sdma_fw->data;
4241 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4242 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4243 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4244 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4245
4246 /* sdma1 */
4247 fw_data = (const __be32 *)rdev->sdma_fw->data;
4248 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4249 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4250 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4251 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4252
4253 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4254 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4255 return 0;
4256}
4257
4258/**
4259 * cik_sdma_resume - setup and start the async dma engines
4260 *
4261 * @rdev: radeon_device pointer
4262 *
4263 * Set up the DMA engines and enable them (CIK).
4264 * Returns 0 for success, error for failure.
4265 */
4266static int cik_sdma_resume(struct radeon_device *rdev)
4267{
4268 int r;
4269
4270 /* Reset dma */
4271 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4272 RREG32(SRBM_SOFT_RESET);
4273 udelay(50);
4274 WREG32(SRBM_SOFT_RESET, 0);
4275 RREG32(SRBM_SOFT_RESET);
4276
4277 r = cik_sdma_load_microcode(rdev);
4278 if (r)
4279 return r;
4280
4281 /* unhalt the MEs */
4282 cik_sdma_enable(rdev, true);
4283
4284 /* start the gfx rings and rlc compute queues */
4285 r = cik_sdma_gfx_resume(rdev);
4286 if (r)
4287 return r;
4288 r = cik_sdma_rlc_resume(rdev);
4289 if (r)
4290 return r;
4291
4292 return 0;
4293}
4294
4295/**
4296 * cik_sdma_fini - tear down the async dma engines
4297 *
4298 * @rdev: radeon_device pointer
4299 *
4300 * Stop the async dma engines and free the rings (CIK).
4301 */
4302static void cik_sdma_fini(struct radeon_device *rdev)
4303{
4304 /* stop the gfx rings and rlc compute queues */
4305 cik_sdma_gfx_stop(rdev);
4306 cik_sdma_rlc_stop(rdev);
4307 /* halt the MEs */
4308 cik_sdma_enable(rdev, false);
4309 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4310 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4311 /* XXX - compute dma queue tear down */
4312}
4313
4314/**
4315 * cik_copy_dma - copy pages using the DMA engine
4316 *
4317 * @rdev: radeon_device pointer
4318 * @src_offset: src GPU address
4319 * @dst_offset: dst GPU address
4320 * @num_gpu_pages: number of GPU pages to xfer
4321 * @fence: radeon fence object
4322 *
4323 * Copy GPU paging using the DMA engine (CIK).
4324 * Used by the radeon ttm implementation to move pages if
4325 * registered as the asic copy callback.
4326 */
4327int cik_copy_dma(struct radeon_device *rdev,
4328 uint64_t src_offset, uint64_t dst_offset,
4329 unsigned num_gpu_pages,
4330 struct radeon_fence **fence)
4331{
4332 struct radeon_semaphore *sem = NULL;
4333 int ring_index = rdev->asic->copy.dma_ring_index;
4334 struct radeon_ring *ring = &rdev->ring[ring_index];
4335 u32 size_in_bytes, cur_size_in_bytes;
4336 int i, num_loops;
4337 int r = 0;
4338
4339 r = radeon_semaphore_create(rdev, &sem);
4340 if (r) {
4341 DRM_ERROR("radeon: moving bo (%d).\n", r);
4342 return r;
4343 }
4344
4345 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4346 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4347 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4348 if (r) {
4349 DRM_ERROR("radeon: moving bo (%d).\n", r);
4350 radeon_semaphore_free(rdev, &sem, NULL);
4351 return r;
4352 }
4353
4354 if (radeon_fence_need_sync(*fence, ring->idx)) {
4355 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4356 ring->idx);
4357 radeon_fence_note_sync(*fence, ring->idx);
4358 } else {
4359 radeon_semaphore_free(rdev, &sem, NULL);
4360 }
4361
4362 for (i = 0; i < num_loops; i++) {
4363 cur_size_in_bytes = size_in_bytes;
4364 if (cur_size_in_bytes > 0x1fffff)
4365 cur_size_in_bytes = 0x1fffff;
4366 size_in_bytes -= cur_size_in_bytes;
4367 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4368 radeon_ring_write(ring, cur_size_in_bytes);
4369 radeon_ring_write(ring, 0); /* src/dst endian swap */
4370 radeon_ring_write(ring, src_offset & 0xffffffff);
4371 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4372 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4373 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4374 src_offset += cur_size_in_bytes;
4375 dst_offset += cur_size_in_bytes;
4376 }
4377
4378 r = radeon_fence_emit(rdev, fence, ring->idx);
4379 if (r) {
4380 radeon_ring_unlock_undo(rdev, ring);
4381 return r;
4382 }
4383
4384 radeon_ring_unlock_commit(rdev, ring);
4385 radeon_semaphore_free(rdev, &sem, *fence);
4386
4387 return r;
4388}
4389
4390/**
4391 * cik_sdma_ring_test - simple async dma engine test
4392 *
4393 * @rdev: radeon_device pointer
4394 * @ring: radeon_ring structure holding ring information
4395 *
4396 * Test the DMA engine by writing using it to write an
4397 * value to memory. (CIK).
4398 * Returns 0 for success, error for failure.
4399 */
4400int cik_sdma_ring_test(struct radeon_device *rdev,
4401 struct radeon_ring *ring)
4402{
4403 unsigned i;
4404 int r;
4405 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4406 u32 tmp;
4407
4408 if (!ptr) {
4409 DRM_ERROR("invalid vram scratch pointer\n");
4410 return -EINVAL;
4411 }
4412
4413 tmp = 0xCAFEDEAD;
4414 writel(tmp, ptr);
4415
4416 r = radeon_ring_lock(rdev, ring, 4);
4417 if (r) {
4418 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4419 return r;
4420 }
4421 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4422 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4423 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4424 radeon_ring_write(ring, 1); /* number of DWs to follow */
4425 radeon_ring_write(ring, 0xDEADBEEF);
4426 radeon_ring_unlock_commit(rdev, ring);
4427
4428 for (i = 0; i < rdev->usec_timeout; i++) {
4429 tmp = readl(ptr);
4430 if (tmp == 0xDEADBEEF)
4431 break;
4432 DRM_UDELAY(1);
4433 }
4434
4435 if (i < rdev->usec_timeout) {
4436 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4437 } else {
4438 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4439 ring->idx, tmp);
4440 r = -EINVAL;
4441 }
4442 return r;
4443}
4444
4445/**
4446 * cik_sdma_ib_test - test an IB on the DMA engine
4447 *
4448 * @rdev: radeon_device pointer
4449 * @ring: radeon_ring structure holding ring information
4450 *
4451 * Test a simple IB in the DMA ring (CIK).
4452 * Returns 0 on success, error on failure.
4453 */
4454int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4455{
4456 struct radeon_ib ib;
4457 unsigned i;
4458 int r;
4459 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4460 u32 tmp = 0;
4461
4462 if (!ptr) {
4463 DRM_ERROR("invalid vram scratch pointer\n");
4464 return -EINVAL;
4465 }
4466
4467 tmp = 0xCAFEDEAD;
4468 writel(tmp, ptr);
4469
4470 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4471 if (r) {
4472 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4473 return r;
4474 }
4475
4476 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4477 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4478 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4479 ib.ptr[3] = 1;
4480 ib.ptr[4] = 0xDEADBEEF;
4481 ib.length_dw = 5;
4482
4483 r = radeon_ib_schedule(rdev, &ib, NULL);
4484 if (r) {
4485 radeon_ib_free(rdev, &ib);
4486 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4487 return r;
4488 }
4489 r = radeon_fence_wait(ib.fence, false);
4490 if (r) {
4491 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4492 return r;
4493 }
4494 for (i = 0; i < rdev->usec_timeout; i++) {
4495 tmp = readl(ptr);
4496 if (tmp == 0xDEADBEEF)
4497 break;
4498 DRM_UDELAY(1);
4499 }
4500 if (i < rdev->usec_timeout) {
4501 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4502 } else {
4503 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4504 r = -EINVAL;
4505 }
4506 radeon_ib_free(rdev, &ib);
4507 return r;
4508}
4509
6f2043ce 4510
cc066715 4511static void cik_print_gpu_status_regs(struct radeon_device *rdev)
6f2043ce 4512{
6f2043ce
AD
4513 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4514 RREG32(GRBM_STATUS));
4515 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4516 RREG32(GRBM_STATUS2));
4517 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4518 RREG32(GRBM_STATUS_SE0));
4519 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4520 RREG32(GRBM_STATUS_SE1));
4521 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4522 RREG32(GRBM_STATUS_SE2));
4523 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4524 RREG32(GRBM_STATUS_SE3));
4525 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4526 RREG32(SRBM_STATUS));
4527 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4528 RREG32(SRBM_STATUS2));
cc066715
AD
4529 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4530 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4531 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4532 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
963e81f9
AD
4533 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4534 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4535 RREG32(CP_STALLED_STAT1));
4536 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4537 RREG32(CP_STALLED_STAT2));
4538 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4539 RREG32(CP_STALLED_STAT3));
4540 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4541 RREG32(CP_CPF_BUSY_STAT));
4542 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4543 RREG32(CP_CPF_STALLED_STAT1));
4544 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4545 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4546 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4547 RREG32(CP_CPC_STALLED_STAT1));
4548 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
cc066715 4549}
6f2043ce 4550
cc066715
AD
4551/**
4552 * cik_gpu_check_soft_reset - check which blocks are busy
4553 *
4554 * @rdev: radeon_device pointer
4555 *
4556 * Check which blocks are busy and return the relevant reset
4557 * mask to be used by cik_gpu_soft_reset().
4558 * Returns a mask of the blocks to be reset.
4559 */
4560static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4561{
4562 u32 reset_mask = 0;
4563 u32 tmp;
6f2043ce 4564
cc066715
AD
4565 /* GRBM_STATUS */
4566 tmp = RREG32(GRBM_STATUS);
4567 if (tmp & (PA_BUSY | SC_BUSY |
4568 BCI_BUSY | SX_BUSY |
4569 TA_BUSY | VGT_BUSY |
4570 DB_BUSY | CB_BUSY |
4571 GDS_BUSY | SPI_BUSY |
4572 IA_BUSY | IA_BUSY_NO_DMA))
4573 reset_mask |= RADEON_RESET_GFX;
4574
4575 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4576 reset_mask |= RADEON_RESET_CP;
4577
4578 /* GRBM_STATUS2 */
4579 tmp = RREG32(GRBM_STATUS2);
4580 if (tmp & RLC_BUSY)
4581 reset_mask |= RADEON_RESET_RLC;
4582
4583 /* SDMA0_STATUS_REG */
4584 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4585 if (!(tmp & SDMA_IDLE))
4586 reset_mask |= RADEON_RESET_DMA;
4587
4588 /* SDMA1_STATUS_REG */
4589 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4590 if (!(tmp & SDMA_IDLE))
4591 reset_mask |= RADEON_RESET_DMA1;
4592
4593 /* SRBM_STATUS2 */
4594 tmp = RREG32(SRBM_STATUS2);
4595 if (tmp & SDMA_BUSY)
4596 reset_mask |= RADEON_RESET_DMA;
4597
4598 if (tmp & SDMA1_BUSY)
4599 reset_mask |= RADEON_RESET_DMA1;
4600
4601 /* SRBM_STATUS */
4602 tmp = RREG32(SRBM_STATUS);
4603
4604 if (tmp & IH_BUSY)
4605 reset_mask |= RADEON_RESET_IH;
4606
4607 if (tmp & SEM_BUSY)
4608 reset_mask |= RADEON_RESET_SEM;
4609
4610 if (tmp & GRBM_RQ_PENDING)
4611 reset_mask |= RADEON_RESET_GRBM;
4612
4613 if (tmp & VMC_BUSY)
4614 reset_mask |= RADEON_RESET_VMC;
4615
4616 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4617 MCC_BUSY | MCD_BUSY))
4618 reset_mask |= RADEON_RESET_MC;
4619
4620 if (evergreen_is_display_hung(rdev))
4621 reset_mask |= RADEON_RESET_DISPLAY;
4622
4623 /* Skip MC reset as it's mostly likely not hung, just busy */
4624 if (reset_mask & RADEON_RESET_MC) {
4625 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4626 reset_mask &= ~RADEON_RESET_MC;
4627 }
4628
4629 return reset_mask;
6f2043ce
AD
4630}
4631
4632/**
cc066715 4633 * cik_gpu_soft_reset - soft reset GPU
6f2043ce
AD
4634 *
4635 * @rdev: radeon_device pointer
cc066715 4636 * @reset_mask: mask of which blocks to reset
6f2043ce 4637 *
cc066715 4638 * Soft reset the blocks specified in @reset_mask.
6f2043ce 4639 */
cc066715 4640static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
6f2043ce
AD
4641{
4642 struct evergreen_mc_save save;
cc066715
AD
4643 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4644 u32 tmp;
4645
4646 if (reset_mask == 0)
4647 return;
4648
4649 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4650
4651 cik_print_gpu_status_regs(rdev);
4652 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4653 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4654 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4655 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4656
4657 /* stop the rlc */
4658 cik_rlc_stop(rdev);
4659
4660 /* Disable GFX parsing/prefetching */
4661 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4662
4663 /* Disable MEC parsing/prefetching */
4664 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4665
4666 if (reset_mask & RADEON_RESET_DMA) {
4667 /* sdma0 */
4668 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4669 tmp |= SDMA_HALT;
4670 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4671 }
4672 if (reset_mask & RADEON_RESET_DMA1) {
4673 /* sdma1 */
4674 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4675 tmp |= SDMA_HALT;
4676 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4677 }
6f2043ce 4678
6f2043ce 4679 evergreen_mc_stop(rdev, &save);
cc066715 4680 if (evergreen_mc_wait_for_idle(rdev)) {
6f2043ce
AD
4681 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4682 }
6f2043ce 4683
cc066715
AD
4684 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4685 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4686
4687 if (reset_mask & RADEON_RESET_CP) {
4688 grbm_soft_reset |= SOFT_RESET_CP;
4689
4690 srbm_soft_reset |= SOFT_RESET_GRBM;
4691 }
4692
4693 if (reset_mask & RADEON_RESET_DMA)
4694 srbm_soft_reset |= SOFT_RESET_SDMA;
4695
4696 if (reset_mask & RADEON_RESET_DMA1)
4697 srbm_soft_reset |= SOFT_RESET_SDMA1;
4698
4699 if (reset_mask & RADEON_RESET_DISPLAY)
4700 srbm_soft_reset |= SOFT_RESET_DC;
4701
4702 if (reset_mask & RADEON_RESET_RLC)
4703 grbm_soft_reset |= SOFT_RESET_RLC;
4704
4705 if (reset_mask & RADEON_RESET_SEM)
4706 srbm_soft_reset |= SOFT_RESET_SEM;
4707
4708 if (reset_mask & RADEON_RESET_IH)
4709 srbm_soft_reset |= SOFT_RESET_IH;
4710
4711 if (reset_mask & RADEON_RESET_GRBM)
4712 srbm_soft_reset |= SOFT_RESET_GRBM;
4713
4714 if (reset_mask & RADEON_RESET_VMC)
4715 srbm_soft_reset |= SOFT_RESET_VMC;
4716
4717 if (!(rdev->flags & RADEON_IS_IGP)) {
4718 if (reset_mask & RADEON_RESET_MC)
4719 srbm_soft_reset |= SOFT_RESET_MC;
4720 }
4721
4722 if (grbm_soft_reset) {
4723 tmp = RREG32(GRBM_SOFT_RESET);
4724 tmp |= grbm_soft_reset;
4725 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4726 WREG32(GRBM_SOFT_RESET, tmp);
4727 tmp = RREG32(GRBM_SOFT_RESET);
4728
4729 udelay(50);
4730
4731 tmp &= ~grbm_soft_reset;
4732 WREG32(GRBM_SOFT_RESET, tmp);
4733 tmp = RREG32(GRBM_SOFT_RESET);
4734 }
4735
4736 if (srbm_soft_reset) {
4737 tmp = RREG32(SRBM_SOFT_RESET);
4738 tmp |= srbm_soft_reset;
4739 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4740 WREG32(SRBM_SOFT_RESET, tmp);
4741 tmp = RREG32(SRBM_SOFT_RESET);
4742
4743 udelay(50);
4744
4745 tmp &= ~srbm_soft_reset;
4746 WREG32(SRBM_SOFT_RESET, tmp);
4747 tmp = RREG32(SRBM_SOFT_RESET);
4748 }
6f2043ce 4749
6f2043ce
AD
4750 /* Wait a little for things to settle down */
4751 udelay(50);
cc066715 4752
6f2043ce 4753 evergreen_mc_resume(rdev, &save);
cc066715
AD
4754 udelay(50);
4755
4756 cik_print_gpu_status_regs(rdev);
6f2043ce
AD
4757}
4758
4759/**
cc066715 4760 * cik_asic_reset - soft reset GPU
6f2043ce
AD
4761 *
4762 * @rdev: radeon_device pointer
4763 *
cc066715
AD
4764 * Look up which blocks are hung and attempt
4765 * to reset them.
6f2043ce
AD
4766 * Returns 0 for success.
4767 */
4768int cik_asic_reset(struct radeon_device *rdev)
4769{
cc066715 4770 u32 reset_mask;
6f2043ce 4771
cc066715
AD
4772 reset_mask = cik_gpu_check_soft_reset(rdev);
4773
4774 if (reset_mask)
4775 r600_set_bios_scratch_engine_hung(rdev, true);
4776
4777 cik_gpu_soft_reset(rdev, reset_mask);
6f2043ce 4778
cc066715
AD
4779 reset_mask = cik_gpu_check_soft_reset(rdev);
4780
4781 if (!reset_mask)
4782 r600_set_bios_scratch_engine_hung(rdev, false);
4783
4784 return 0;
4785}
4786
4787/**
4788 * cik_gfx_is_lockup - check if the 3D engine is locked up
4789 *
4790 * @rdev: radeon_device pointer
4791 * @ring: radeon_ring structure holding ring information
4792 *
4793 * Check if the 3D engine is locked up (CIK).
4794 * Returns true if the engine is locked, false if not.
4795 */
4796bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4797{
4798 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4799
4800 if (!(reset_mask & (RADEON_RESET_GFX |
4801 RADEON_RESET_COMPUTE |
4802 RADEON_RESET_CP))) {
4803 radeon_ring_lockup_update(ring);
4804 return false;
4805 }
4806 /* force CP activities */
4807 radeon_ring_force_activity(rdev, ring);
4808 return radeon_ring_test_lockup(rdev, ring);
6f2043ce 4809}
1c49165d 4810
21a93e13
AD
4811/**
4812 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4813 *
4814 * @rdev: radeon_device pointer
4815 * @ring: radeon_ring structure holding ring information
4816 *
4817 * Check if the async DMA engine is locked up (CIK).
4818 * Returns true if the engine appears to be locked up, false if not.
4819 */
4820bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4821{
cc066715
AD
4822 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4823 u32 mask;
21a93e13
AD
4824
4825 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
cc066715 4826 mask = RADEON_RESET_DMA;
21a93e13 4827 else
cc066715
AD
4828 mask = RADEON_RESET_DMA1;
4829
4830 if (!(reset_mask & mask)) {
21a93e13
AD
4831 radeon_ring_lockup_update(ring);
4832 return false;
4833 }
4834 /* force ring activities */
4835 radeon_ring_force_activity(rdev, ring);
4836 return radeon_ring_test_lockup(rdev, ring);
4837}
4838
1c49165d
AD
4839/* MC */
4840/**
4841 * cik_mc_program - program the GPU memory controller
4842 *
4843 * @rdev: radeon_device pointer
4844 *
4845 * Set the location of vram, gart, and AGP in the GPU's
4846 * physical address space (CIK).
4847 */
4848static void cik_mc_program(struct radeon_device *rdev)
4849{
4850 struct evergreen_mc_save save;
4851 u32 tmp;
4852 int i, j;
4853
4854 /* Initialize HDP */
4855 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4856 WREG32((0x2c14 + j), 0x00000000);
4857 WREG32((0x2c18 + j), 0x00000000);
4858 WREG32((0x2c1c + j), 0x00000000);
4859 WREG32((0x2c20 + j), 0x00000000);
4860 WREG32((0x2c24 + j), 0x00000000);
4861 }
4862 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4863
4864 evergreen_mc_stop(rdev, &save);
4865 if (radeon_mc_wait_for_idle(rdev)) {
4866 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4867 }
4868 /* Lockout access through VGA aperture*/
4869 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4870 /* Update configuration */
4871 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4872 rdev->mc.vram_start >> 12);
4873 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4874 rdev->mc.vram_end >> 12);
4875 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4876 rdev->vram_scratch.gpu_addr >> 12);
4877 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4878 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4879 WREG32(MC_VM_FB_LOCATION, tmp);
4880 /* XXX double check these! */
4881 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4882 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4883 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4884 WREG32(MC_VM_AGP_BASE, 0);
4885 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4886 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4887 if (radeon_mc_wait_for_idle(rdev)) {
4888 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4889 }
4890 evergreen_mc_resume(rdev, &save);
4891 /* we need to own VRAM, so turn off the VGA renderer here
4892 * to stop it overwriting our objects */
4893 rv515_vga_render_disable(rdev);
4894}
4895
4896/**
4897 * cik_mc_init - initialize the memory controller driver params
4898 *
4899 * @rdev: radeon_device pointer
4900 *
4901 * Look up the amount of vram, vram width, and decide how to place
4902 * vram and gart within the GPU's physical address space (CIK).
4903 * Returns 0 for success.
4904 */
4905static int cik_mc_init(struct radeon_device *rdev)
4906{
4907 u32 tmp;
4908 int chansize, numchan;
4909
4910 /* Get VRAM informations */
4911 rdev->mc.vram_is_ddr = true;
4912 tmp = RREG32(MC_ARB_RAMCFG);
4913 if (tmp & CHANSIZE_MASK) {
4914 chansize = 64;
4915 } else {
4916 chansize = 32;
4917 }
4918 tmp = RREG32(MC_SHARED_CHMAP);
4919 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4920 case 0:
4921 default:
4922 numchan = 1;
4923 break;
4924 case 1:
4925 numchan = 2;
4926 break;
4927 case 2:
4928 numchan = 4;
4929 break;
4930 case 3:
4931 numchan = 8;
4932 break;
4933 case 4:
4934 numchan = 3;
4935 break;
4936 case 5:
4937 numchan = 6;
4938 break;
4939 case 6:
4940 numchan = 10;
4941 break;
4942 case 7:
4943 numchan = 12;
4944 break;
4945 case 8:
4946 numchan = 16;
4947 break;
4948 }
4949 rdev->mc.vram_width = numchan * chansize;
4950 /* Could aper size report 0 ? */
4951 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4952 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4953 /* size in MB on si */
4954 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4955 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4956 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4957 si_vram_gtt_location(rdev, &rdev->mc);
4958 radeon_update_bandwidth_info(rdev);
4959
4960 return 0;
4961}
4962
4963/*
4964 * GART
4965 * VMID 0 is the physical GPU addresses as used by the kernel.
4966 * VMIDs 1-15 are used for userspace clients and are handled
4967 * by the radeon vm/hsa code.
4968 */
4969/**
4970 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4971 *
4972 * @rdev: radeon_device pointer
4973 *
4974 * Flush the TLB for the VMID 0 page table (CIK).
4975 */
4976void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4977{
4978 /* flush hdp cache */
4979 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4980
4981 /* bits 0-15 are the VM contexts0-15 */
4982 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4983}
4984
4985/**
4986 * cik_pcie_gart_enable - gart enable
4987 *
4988 * @rdev: radeon_device pointer
4989 *
4990 * This sets up the TLBs, programs the page tables for VMID0,
4991 * sets up the hw for VMIDs 1-15 which are allocated on
4992 * demand, and sets up the global locations for the LDS, GDS,
4993 * and GPUVM for FSA64 clients (CIK).
4994 * Returns 0 for success, errors for failure.
4995 */
4996static int cik_pcie_gart_enable(struct radeon_device *rdev)
4997{
4998 int r, i;
4999
5000 if (rdev->gart.robj == NULL) {
5001 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5002 return -EINVAL;
5003 }
5004 r = radeon_gart_table_vram_pin(rdev);
5005 if (r)
5006 return r;
5007 radeon_gart_restore(rdev);
5008 /* Setup TLB control */
5009 WREG32(MC_VM_MX_L1_TLB_CNTL,
5010 (0xA << 7) |
5011 ENABLE_L1_TLB |
5012 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5013 ENABLE_ADVANCED_DRIVER_MODEL |
5014 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5015 /* Setup L2 cache */
5016 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5017 ENABLE_L2_FRAGMENT_PROCESSING |
5018 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5019 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5020 EFFECTIVE_L2_QUEUE_SIZE(7) |
5021 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5022 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5023 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5024 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5025 /* setup context0 */
5026 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5027 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5028 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5029 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5030 (u32)(rdev->dummy_page.addr >> 12));
5031 WREG32(VM_CONTEXT0_CNTL2, 0);
5032 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5033 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5034
5035 WREG32(0x15D4, 0);
5036 WREG32(0x15D8, 0);
5037 WREG32(0x15DC, 0);
5038
5039 /* empty context1-15 */
5040 /* FIXME start with 4G, once using 2 level pt switch to full
5041 * vm size space
5042 */
5043 /* set vm size, must be a multiple of 4 */
5044 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5045 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5046 for (i = 1; i < 16; i++) {
5047 if (i < 8)
5048 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5049 rdev->gart.table_addr >> 12);
5050 else
5051 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5052 rdev->gart.table_addr >> 12);
5053 }
5054
5055 /* enable context1-15 */
5056 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5057 (u32)(rdev->dummy_page.addr >> 12));
a00024b0 5058 WREG32(VM_CONTEXT1_CNTL2, 4);
1c49165d 5059 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
a00024b0
AD
5060 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5061 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5062 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5063 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5064 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5065 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5066 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5067 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5068 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5069 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5070 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5071 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1c49165d
AD
5072
5073 /* TC cache setup ??? */
5074 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5075 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5076 WREG32(TC_CFG_L1_STORE_POLICY, 0);
5077
5078 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5079 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5080 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5081 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5082 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5083
5084 WREG32(TC_CFG_L1_VOLATILE, 0);
5085 WREG32(TC_CFG_L2_VOLATILE, 0);
5086
5087 if (rdev->family == CHIP_KAVERI) {
5088 u32 tmp = RREG32(CHUB_CONTROL);
5089 tmp &= ~BYPASS_VM;
5090 WREG32(CHUB_CONTROL, tmp);
5091 }
5092
5093 /* XXX SH_MEM regs */
5094 /* where to put LDS, scratch, GPUVM in FSA64 space */
f61d5b46 5095 mutex_lock(&rdev->srbm_mutex);
1c49165d 5096 for (i = 0; i < 16; i++) {
b556b12e 5097 cik_srbm_select(rdev, 0, 0, 0, i);
21a93e13 5098 /* CP and shaders */
1c49165d
AD
5099 WREG32(SH_MEM_CONFIG, 0);
5100 WREG32(SH_MEM_APE1_BASE, 1);
5101 WREG32(SH_MEM_APE1_LIMIT, 0);
5102 WREG32(SH_MEM_BASES, 0);
21a93e13
AD
5103 /* SDMA GFX */
5104 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5105 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5106 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5107 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5108 /* XXX SDMA RLC - todo */
1c49165d 5109 }
b556b12e 5110 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 5111 mutex_unlock(&rdev->srbm_mutex);
1c49165d
AD
5112
5113 cik_pcie_gart_tlb_flush(rdev);
5114 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5115 (unsigned)(rdev->mc.gtt_size >> 20),
5116 (unsigned long long)rdev->gart.table_addr);
5117 rdev->gart.ready = true;
5118 return 0;
5119}
5120
5121/**
5122 * cik_pcie_gart_disable - gart disable
5123 *
5124 * @rdev: radeon_device pointer
5125 *
5126 * This disables all VM page table (CIK).
5127 */
5128static void cik_pcie_gart_disable(struct radeon_device *rdev)
5129{
5130 /* Disable all tables */
5131 WREG32(VM_CONTEXT0_CNTL, 0);
5132 WREG32(VM_CONTEXT1_CNTL, 0);
5133 /* Setup TLB control */
5134 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5135 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5136 /* Setup L2 cache */
5137 WREG32(VM_L2_CNTL,
5138 ENABLE_L2_FRAGMENT_PROCESSING |
5139 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5140 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5141 EFFECTIVE_L2_QUEUE_SIZE(7) |
5142 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5143 WREG32(VM_L2_CNTL2, 0);
5144 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5145 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5146 radeon_gart_table_vram_unpin(rdev);
5147}
5148
5149/**
5150 * cik_pcie_gart_fini - vm fini callback
5151 *
5152 * @rdev: radeon_device pointer
5153 *
5154 * Tears down the driver GART/VM setup (CIK).
5155 */
5156static void cik_pcie_gart_fini(struct radeon_device *rdev)
5157{
5158 cik_pcie_gart_disable(rdev);
5159 radeon_gart_table_vram_free(rdev);
5160 radeon_gart_fini(rdev);
5161}
5162
5163/* vm parser */
5164/**
5165 * cik_ib_parse - vm ib_parse callback
5166 *
5167 * @rdev: radeon_device pointer
5168 * @ib: indirect buffer pointer
5169 *
5170 * CIK uses hw IB checking so this is a nop (CIK).
5171 */
5172int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5173{
5174 return 0;
5175}
5176
5177/*
5178 * vm
5179 * VMID 0 is the physical GPU addresses as used by the kernel.
5180 * VMIDs 1-15 are used for userspace clients and are handled
5181 * by the radeon vm/hsa code.
5182 */
5183/**
5184 * cik_vm_init - cik vm init callback
5185 *
5186 * @rdev: radeon_device pointer
5187 *
5188 * Inits cik specific vm parameters (number of VMs, base of vram for
5189 * VMIDs 1-15) (CIK).
5190 * Returns 0 for success.
5191 */
5192int cik_vm_init(struct radeon_device *rdev)
5193{
5194 /* number of VMs */
5195 rdev->vm_manager.nvm = 16;
5196 /* base offset of vram pages */
5197 if (rdev->flags & RADEON_IS_IGP) {
5198 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5199 tmp <<= 22;
5200 rdev->vm_manager.vram_base_offset = tmp;
5201 } else
5202 rdev->vm_manager.vram_base_offset = 0;
5203
5204 return 0;
5205}
5206
5207/**
5208 * cik_vm_fini - cik vm fini callback
5209 *
5210 * @rdev: radeon_device pointer
5211 *
5212 * Tear down any asic specific VM setup (CIK).
5213 */
5214void cik_vm_fini(struct radeon_device *rdev)
5215{
5216}
5217
3ec7d11b
AD
5218/**
5219 * cik_vm_decode_fault - print human readable fault info
5220 *
5221 * @rdev: radeon_device pointer
5222 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5223 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5224 *
5225 * Print human readable fault information (CIK).
5226 */
5227static void cik_vm_decode_fault(struct radeon_device *rdev,
5228 u32 status, u32 addr, u32 mc_client)
5229{
5230 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5231 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5232 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5233 char *block = (char *)&mc_client;
5234
5235 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5236 protections, vmid, addr,
5237 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5238 block, mc_id);
5239}
5240
f96ab484
AD
5241/**
5242 * cik_vm_flush - cik vm flush using the CP
5243 *
5244 * @rdev: radeon_device pointer
5245 *
5246 * Update the page table base and flush the VM TLB
5247 * using the CP (CIK).
5248 */
5249void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5250{
5251 struct radeon_ring *ring = &rdev->ring[ridx];
5252
5253 if (vm == NULL)
5254 return;
5255
5256 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5257 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5258 WRITE_DATA_DST_SEL(0)));
5259 if (vm->id < 8) {
5260 radeon_ring_write(ring,
5261 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5262 } else {
5263 radeon_ring_write(ring,
5264 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5265 }
5266 radeon_ring_write(ring, 0);
5267 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5268
5269 /* update SH_MEM_* regs */
5270 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5271 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5272 WRITE_DATA_DST_SEL(0)));
5273 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5274 radeon_ring_write(ring, 0);
5275 radeon_ring_write(ring, VMID(vm->id));
5276
5277 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5278 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5279 WRITE_DATA_DST_SEL(0)));
5280 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5281 radeon_ring_write(ring, 0);
5282
5283 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5284 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5285 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5286 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5287
5288 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5289 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5290 WRITE_DATA_DST_SEL(0)));
5291 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5292 radeon_ring_write(ring, 0);
5293 radeon_ring_write(ring, VMID(0));
5294
5295 /* HDP flush */
5296 /* We should be using the WAIT_REG_MEM packet here like in
5297 * cik_fence_ring_emit(), but it causes the CP to hang in this
5298 * context...
5299 */
5300 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5301 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5302 WRITE_DATA_DST_SEL(0)));
5303 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5304 radeon_ring_write(ring, 0);
5305 radeon_ring_write(ring, 0);
5306
5307 /* bits 0-15 are the VM contexts0-15 */
5308 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5309 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5310 WRITE_DATA_DST_SEL(0)));
5311 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5312 radeon_ring_write(ring, 0);
5313 radeon_ring_write(ring, 1 << vm->id);
5314
b07fdd38
AD
5315 /* compute doesn't have PFP */
5316 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5317 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5318 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5319 radeon_ring_write(ring, 0x0);
5320 }
f96ab484
AD
5321}
5322
d0e092d9
AD
5323/**
5324 * cik_vm_set_page - update the page tables using sDMA
5325 *
5326 * @rdev: radeon_device pointer
5327 * @ib: indirect buffer to fill with commands
5328 * @pe: addr of the page entry
5329 * @addr: dst addr to write into pe
5330 * @count: number of page entries to update
5331 * @incr: increase next addr by incr bytes
5332 * @flags: access flags
5333 *
5334 * Update the page tables using CP or sDMA (CIK).
5335 */
5336void cik_vm_set_page(struct radeon_device *rdev,
5337 struct radeon_ib *ib,
5338 uint64_t pe,
5339 uint64_t addr, unsigned count,
5340 uint32_t incr, uint32_t flags)
5341{
5342 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5343 uint64_t value;
5344 unsigned ndw;
5345
5346 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5347 /* CP */
5348 while (count) {
5349 ndw = 2 + count * 2;
5350 if (ndw > 0x3FFE)
5351 ndw = 0x3FFE;
5352
5353 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5354 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5355 WRITE_DATA_DST_SEL(1));
5356 ib->ptr[ib->length_dw++] = pe;
5357 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5358 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5359 if (flags & RADEON_VM_PAGE_SYSTEM) {
5360 value = radeon_vm_map_gart(rdev, addr);
5361 value &= 0xFFFFFFFFFFFFF000ULL;
5362 } else if (flags & RADEON_VM_PAGE_VALID) {
5363 value = addr;
5364 } else {
5365 value = 0;
5366 }
5367 addr += incr;
5368 value |= r600_flags;
5369 ib->ptr[ib->length_dw++] = value;
5370 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5371 }
5372 }
5373 } else {
5374 /* DMA */
5375 if (flags & RADEON_VM_PAGE_SYSTEM) {
5376 while (count) {
5377 ndw = count * 2;
5378 if (ndw > 0xFFFFE)
5379 ndw = 0xFFFFE;
5380
5381 /* for non-physically contiguous pages (system) */
5382 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5383 ib->ptr[ib->length_dw++] = pe;
5384 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5385 ib->ptr[ib->length_dw++] = ndw;
5386 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5387 if (flags & RADEON_VM_PAGE_SYSTEM) {
5388 value = radeon_vm_map_gart(rdev, addr);
5389 value &= 0xFFFFFFFFFFFFF000ULL;
5390 } else if (flags & RADEON_VM_PAGE_VALID) {
5391 value = addr;
5392 } else {
5393 value = 0;
5394 }
5395 addr += incr;
5396 value |= r600_flags;
5397 ib->ptr[ib->length_dw++] = value;
5398 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5399 }
5400 }
5401 } else {
5402 while (count) {
5403 ndw = count;
5404 if (ndw > 0x7FFFF)
5405 ndw = 0x7FFFF;
5406
5407 if (flags & RADEON_VM_PAGE_VALID)
5408 value = addr;
5409 else
5410 value = 0;
5411 /* for physically contiguous pages (vram) */
5412 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5413 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5414 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5415 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5416 ib->ptr[ib->length_dw++] = 0;
5417 ib->ptr[ib->length_dw++] = value; /* value */
5418 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5419 ib->ptr[ib->length_dw++] = incr; /* increment size */
5420 ib->ptr[ib->length_dw++] = 0;
5421 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5422 pe += ndw * 8;
5423 addr += ndw * incr;
5424 count -= ndw;
5425 }
5426 }
5427 while (ib->length_dw & 0x7)
5428 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5429 }
5430}
5431
605de6b9
AD
5432/**
5433 * cik_dma_vm_flush - cik vm flush using sDMA
5434 *
5435 * @rdev: radeon_device pointer
5436 *
5437 * Update the page table base and flush the VM TLB
5438 * using sDMA (CIK).
5439 */
5440void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5441{
5442 struct radeon_ring *ring = &rdev->ring[ridx];
5443 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5444 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5445 u32 ref_and_mask;
5446
5447 if (vm == NULL)
5448 return;
5449
5450 if (ridx == R600_RING_TYPE_DMA_INDEX)
5451 ref_and_mask = SDMA0;
5452 else
5453 ref_and_mask = SDMA1;
5454
5455 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5456 if (vm->id < 8) {
5457 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5458 } else {
5459 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5460 }
5461 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5462
5463 /* update SH_MEM_* regs */
5464 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5465 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5466 radeon_ring_write(ring, VMID(vm->id));
5467
5468 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5469 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5470 radeon_ring_write(ring, 0);
5471
5472 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5473 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5474 radeon_ring_write(ring, 0);
5475
5476 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5477 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5478 radeon_ring_write(ring, 1);
5479
5480 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5481 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5482 radeon_ring_write(ring, 0);
5483
5484 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5485 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5486 radeon_ring_write(ring, VMID(0));
5487
5488 /* flush HDP */
5489 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5490 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5491 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5492 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5493 radeon_ring_write(ring, ref_and_mask); /* MASK */
5494 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5495
5496 /* flush TLB */
5497 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5498 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5499 radeon_ring_write(ring, 1 << vm->id);
5500}
5501
f6796cae
AD
5502/*
5503 * RLC
5504 * The RLC is a multi-purpose microengine that handles a
5505 * variety of functions, the most important of which is
5506 * the interrupt controller.
5507 */
866d83de
AD
5508static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5509 bool enable)
f6796cae 5510{
866d83de 5511 u32 tmp = RREG32(CP_INT_CNTL_RING0);
f6796cae 5512
866d83de
AD
5513 if (enable)
5514 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5515 else
5516 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
f6796cae 5517 WREG32(CP_INT_CNTL_RING0, tmp);
866d83de 5518}
f6796cae 5519
866d83de
AD
5520static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5521{
5522 u32 tmp;
f6796cae 5523
866d83de
AD
5524 tmp = RREG32(RLC_LB_CNTL);
5525 if (enable)
5526 tmp |= LOAD_BALANCE_ENABLE;
5527 else
5528 tmp &= ~LOAD_BALANCE_ENABLE;
5529 WREG32(RLC_LB_CNTL, tmp);
5530}
f6796cae 5531
866d83de
AD
5532static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5533{
5534 u32 i, j, k;
5535 u32 mask;
f6796cae
AD
5536
5537 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5538 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5539 cik_select_se_sh(rdev, i, j);
5540 for (k = 0; k < rdev->usec_timeout; k++) {
5541 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5542 break;
5543 udelay(1);
5544 }
5545 }
5546 }
5547 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5548
5549 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5550 for (k = 0; k < rdev->usec_timeout; k++) {
5551 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5552 break;
5553 udelay(1);
5554 }
5555}
5556
22c775ce
AD
5557static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5558{
5559 u32 tmp;
5560
5561 tmp = RREG32(RLC_CNTL);
5562 if (tmp != rlc)
5563 WREG32(RLC_CNTL, rlc);
5564}
5565
5566static u32 cik_halt_rlc(struct radeon_device *rdev)
5567{
5568 u32 data, orig;
5569
5570 orig = data = RREG32(RLC_CNTL);
5571
5572 if (data & RLC_ENABLE) {
5573 u32 i;
5574
5575 data &= ~RLC_ENABLE;
5576 WREG32(RLC_CNTL, data);
5577
5578 for (i = 0; i < rdev->usec_timeout; i++) {
5579 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5580 break;
5581 udelay(1);
5582 }
5583
5584 cik_wait_for_rlc_serdes(rdev);
5585 }
5586
5587 return orig;
5588}
5589
a412fce0
AD
5590void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5591{
5592 u32 tmp, i, mask;
5593
5594 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5595 WREG32(RLC_GPR_REG2, tmp);
5596
5597 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5598 for (i = 0; i < rdev->usec_timeout; i++) {
5599 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5600 break;
5601 udelay(1);
5602 }
5603
5604 for (i = 0; i < rdev->usec_timeout; i++) {
5605 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5606 break;
5607 udelay(1);
5608 }
5609}
5610
5611void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5612{
5613 u32 tmp;
5614
5615 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5616 WREG32(RLC_GPR_REG2, tmp);
5617}
5618
866d83de
AD
5619/**
5620 * cik_rlc_stop - stop the RLC ME
5621 *
5622 * @rdev: radeon_device pointer
5623 *
5624 * Halt the RLC ME (MicroEngine) (CIK).
5625 */
5626static void cik_rlc_stop(struct radeon_device *rdev)
5627{
22c775ce 5628 WREG32(RLC_CNTL, 0);
866d83de
AD
5629
5630 cik_enable_gui_idle_interrupt(rdev, false);
5631
866d83de
AD
5632 cik_wait_for_rlc_serdes(rdev);
5633}
5634
f6796cae
AD
5635/**
5636 * cik_rlc_start - start the RLC ME
5637 *
5638 * @rdev: radeon_device pointer
5639 *
5640 * Unhalt the RLC ME (MicroEngine) (CIK).
5641 */
5642static void cik_rlc_start(struct radeon_device *rdev)
5643{
f6796cae
AD
5644 WREG32(RLC_CNTL, RLC_ENABLE);
5645
866d83de 5646 cik_enable_gui_idle_interrupt(rdev, true);
f6796cae
AD
5647
5648 udelay(50);
5649}
5650
5651/**
5652 * cik_rlc_resume - setup the RLC hw
5653 *
5654 * @rdev: radeon_device pointer
5655 *
5656 * Initialize the RLC registers, load the ucode,
5657 * and start the RLC (CIK).
5658 * Returns 0 for success, -EINVAL if the ucode is not available.
5659 */
5660static int cik_rlc_resume(struct radeon_device *rdev)
5661{
22c775ce 5662 u32 i, size, tmp;
f6796cae
AD
5663 const __be32 *fw_data;
5664
5665 if (!rdev->rlc_fw)
5666 return -EINVAL;
5667
5668 switch (rdev->family) {
5669 case CHIP_BONAIRE:
5670 default:
5671 size = BONAIRE_RLC_UCODE_SIZE;
5672 break;
5673 case CHIP_KAVERI:
5674 size = KV_RLC_UCODE_SIZE;
5675 break;
5676 case CHIP_KABINI:
5677 size = KB_RLC_UCODE_SIZE;
5678 break;
5679 }
5680
5681 cik_rlc_stop(rdev);
5682
22c775ce
AD
5683 /* disable CG */
5684 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5685 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5686
866d83de 5687 si_rlc_reset(rdev);
f6796cae 5688
22c775ce
AD
5689 cik_init_pg(rdev);
5690
5691 cik_init_cg(rdev);
5692
f6796cae
AD
5693 WREG32(RLC_LB_CNTR_INIT, 0);
5694 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5695
5696 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5697 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5698 WREG32(RLC_LB_PARAMS, 0x00600408);
5699 WREG32(RLC_LB_CNTL, 0x80000004);
5700
5701 WREG32(RLC_MC_CNTL, 0);
5702 WREG32(RLC_UCODE_CNTL, 0);
5703
5704 fw_data = (const __be32 *)rdev->rlc_fw->data;
5705 WREG32(RLC_GPM_UCODE_ADDR, 0);
5706 for (i = 0; i < size; i++)
5707 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5708 WREG32(RLC_GPM_UCODE_ADDR, 0);
5709
866d83de
AD
5710 /* XXX - find out what chips support lbpw */
5711 cik_enable_lbpw(rdev, false);
5712
22c775ce
AD
5713 if (rdev->family == CHIP_BONAIRE)
5714 WREG32(RLC_DRIVER_DMA_STATUS, 0);
f6796cae
AD
5715
5716 cik_rlc_start(rdev);
5717
5718 return 0;
5719}
a59781bb 5720
22c775ce
AD
5721static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5722{
5723 u32 data, orig, tmp, tmp2;
5724
5725 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5726
5727 cik_enable_gui_idle_interrupt(rdev, enable);
5728
5729 if (enable) {
5730 tmp = cik_halt_rlc(rdev);
5731
5732 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5733 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5734 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5735 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5736 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5737
5738 cik_update_rlc(rdev, tmp);
5739
5740 data |= CGCG_EN | CGLS_EN;
5741 } else {
5742 RREG32(CB_CGTT_SCLK_CTRL);
5743 RREG32(CB_CGTT_SCLK_CTRL);
5744 RREG32(CB_CGTT_SCLK_CTRL);
5745 RREG32(CB_CGTT_SCLK_CTRL);
5746
5747 data &= ~(CGCG_EN | CGLS_EN);
5748 }
5749
5750 if (orig != data)
5751 WREG32(RLC_CGCG_CGLS_CTRL, data);
5752
5753}
5754
5755static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5756{
5757 u32 data, orig, tmp = 0;
5758
5759 if (enable) {
5760 orig = data = RREG32(CP_MEM_SLP_CNTL);
5761 data |= CP_MEM_LS_EN;
5762 if (orig != data)
5763 WREG32(CP_MEM_SLP_CNTL, data);
5764
5765 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5766 data &= 0xfffffffd;
5767 if (orig != data)
5768 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5769
5770 tmp = cik_halt_rlc(rdev);
5771
5772 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5773 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5774 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5775 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5776 WREG32(RLC_SERDES_WR_CTRL, data);
5777
5778 cik_update_rlc(rdev, tmp);
5779
5780 orig = data = RREG32(CGTS_SM_CTRL_REG);
5781 data &= ~SM_MODE_MASK;
5782 data |= SM_MODE(0x2);
5783 data |= SM_MODE_ENABLE;
5784 data &= ~CGTS_OVERRIDE;
5785 data &= ~CGTS_LS_OVERRIDE;
5786 data &= ~ON_MONITOR_ADD_MASK;
5787 data |= ON_MONITOR_ADD_EN;
5788 data |= ON_MONITOR_ADD(0x96);
5789 if (orig != data)
5790 WREG32(CGTS_SM_CTRL_REG, data);
5791 } else {
5792 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5793 data |= 0x00000002;
5794 if (orig != data)
5795 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5796
5797 data = RREG32(RLC_MEM_SLP_CNTL);
5798 if (data & RLC_MEM_LS_EN) {
5799 data &= ~RLC_MEM_LS_EN;
5800 WREG32(RLC_MEM_SLP_CNTL, data);
5801 }
5802
5803 data = RREG32(CP_MEM_SLP_CNTL);
5804 if (data & CP_MEM_LS_EN) {
5805 data &= ~CP_MEM_LS_EN;
5806 WREG32(CP_MEM_SLP_CNTL, data);
5807 }
5808
5809 orig = data = RREG32(CGTS_SM_CTRL_REG);
5810 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5811 if (orig != data)
5812 WREG32(CGTS_SM_CTRL_REG, data);
5813
5814 tmp = cik_halt_rlc(rdev);
5815
5816 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5817 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5818 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5819 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5820 WREG32(RLC_SERDES_WR_CTRL, data);
5821
5822 cik_update_rlc(rdev, tmp);
5823 }
5824}
5825
5826static const u32 mc_cg_registers[] =
5827{
5828 MC_HUB_MISC_HUB_CG,
5829 MC_HUB_MISC_SIP_CG,
5830 MC_HUB_MISC_VM_CG,
5831 MC_XPB_CLK_GAT,
5832 ATC_MISC_CG,
5833 MC_CITF_MISC_WR_CG,
5834 MC_CITF_MISC_RD_CG,
5835 MC_CITF_MISC_VM_CG,
5836 VM_L2_CG,
5837};
5838
5839static void cik_enable_mc_ls(struct radeon_device *rdev,
5840 bool enable)
5841{
5842 int i;
5843 u32 orig, data;
5844
5845 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5846 orig = data = RREG32(mc_cg_registers[i]);
5847 if (enable)
5848 data |= MC_LS_ENABLE;
5849 else
5850 data &= ~MC_LS_ENABLE;
5851 if (data != orig)
5852 WREG32(mc_cg_registers[i], data);
5853 }
5854}
5855
5856static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5857 bool enable)
5858{
5859 int i;
5860 u32 orig, data;
5861
5862 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5863 orig = data = RREG32(mc_cg_registers[i]);
5864 if (enable)
5865 data |= MC_CG_ENABLE;
5866 else
5867 data &= ~MC_CG_ENABLE;
5868 if (data != orig)
5869 WREG32(mc_cg_registers[i], data);
5870 }
5871}
5872
5873static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5874 bool enable)
5875{
5876 u32 orig, data;
5877
5878 if (enable) {
5879 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5880 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5881 } else {
5882 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5883 data |= 0xff000000;
5884 if (data != orig)
5885 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5886
5887 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5888 data |= 0xff000000;
5889 if (data != orig)
5890 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5891 }
5892}
5893
5894static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5895 bool enable)
5896{
5897 u32 orig, data;
5898
5899 if (enable) {
5900 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5901 data |= 0x100;
5902 if (orig != data)
5903 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5904
5905 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5906 data |= 0x100;
5907 if (orig != data)
5908 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5909 } else {
5910 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5911 data &= ~0x100;
5912 if (orig != data)
5913 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5914
5915 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5916 data &= ~0x100;
5917 if (orig != data)
5918 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5919 }
5920}
5921
5922static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5923 bool enable)
5924{
5925 u32 orig, data;
5926
5927 if (enable) {
5928 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5929 data = 0xfff;
5930 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5931
5932 orig = data = RREG32(UVD_CGC_CTRL);
5933 data |= DCM;
5934 if (orig != data)
5935 WREG32(UVD_CGC_CTRL, data);
5936 } else {
5937 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5938 data &= ~0xfff;
5939 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5940
5941 orig = data = RREG32(UVD_CGC_CTRL);
5942 data &= ~DCM;
5943 if (orig != data)
5944 WREG32(UVD_CGC_CTRL, data);
5945 }
5946}
5947
5948static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5949 bool enable)
5950{
5951 u32 orig, data;
5952
5953 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5954
5955 if (enable)
5956 data &= ~CLOCK_GATING_DIS;
5957 else
5958 data |= CLOCK_GATING_DIS;
5959
5960 if (orig != data)
5961 WREG32(HDP_HOST_PATH_CNTL, data);
5962}
5963
5964static void cik_enable_hdp_ls(struct radeon_device *rdev,
5965 bool enable)
5966{
5967 u32 orig, data;
5968
5969 orig = data = RREG32(HDP_MEM_POWER_LS);
5970
5971 if (enable)
5972 data |= HDP_LS_ENABLE;
5973 else
5974 data &= ~HDP_LS_ENABLE;
5975
5976 if (orig != data)
5977 WREG32(HDP_MEM_POWER_LS, data);
5978}
5979
5980void cik_update_cg(struct radeon_device *rdev,
5981 u32 block, bool enable)
5982{
5983 if (block & RADEON_CG_BLOCK_GFX) {
5984 /* order matters! */
5985 if (enable) {
5986 cik_enable_mgcg(rdev, true);
5987 cik_enable_cgcg(rdev, true);
5988 } else {
5989 cik_enable_cgcg(rdev, false);
5990 cik_enable_mgcg(rdev, false);
5991 }
5992 }
5993
5994 if (block & RADEON_CG_BLOCK_MC) {
5995 if (!(rdev->flags & RADEON_IS_IGP)) {
5996 cik_enable_mc_mgcg(rdev, enable);
5997 cik_enable_mc_ls(rdev, enable);
5998 }
5999 }
6000
6001 if (block & RADEON_CG_BLOCK_SDMA) {
6002 cik_enable_sdma_mgcg(rdev, enable);
6003 cik_enable_sdma_mgls(rdev, enable);
6004 }
6005
6006 if (block & RADEON_CG_BLOCK_UVD) {
6007 if (rdev->has_uvd)
6008 cik_enable_uvd_mgcg(rdev, enable);
6009 }
6010
6011 if (block & RADEON_CG_BLOCK_HDP) {
6012 cik_enable_hdp_mgcg(rdev, enable);
6013 cik_enable_hdp_ls(rdev, enable);
6014 }
6015}
6016
6017static void cik_init_cg(struct radeon_device *rdev)
6018{
6019
6020 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
6021
6022 if (rdev->has_uvd)
6023 si_init_uvd_internal_cg(rdev);
6024
6025 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6026 RADEON_CG_BLOCK_SDMA |
6027 RADEON_CG_BLOCK_UVD |
6028 RADEON_CG_BLOCK_HDP), true);
6029}
6030
6031static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6032 bool enable)
6033{
6034 u32 data, orig;
6035
6036 orig = data = RREG32(RLC_PG_CNTL);
6037 if (enable)
6038 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6039 else
6040 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6041 if (orig != data)
6042 WREG32(RLC_PG_CNTL, data);
6043}
6044
6045static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6046 bool enable)
6047{
6048 u32 data, orig;
6049
6050 orig = data = RREG32(RLC_PG_CNTL);
6051 if (enable)
6052 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6053 else
6054 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6055 if (orig != data)
6056 WREG32(RLC_PG_CNTL, data);
6057}
6058
6059static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6060{
6061 u32 data, orig;
6062
6063 orig = data = RREG32(RLC_PG_CNTL);
6064 if (enable)
6065 data &= ~DISABLE_CP_PG;
6066 else
6067 data |= DISABLE_CP_PG;
6068 if (orig != data)
6069 WREG32(RLC_PG_CNTL, data);
6070}
6071
6072static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6073{
6074 u32 data, orig;
6075
6076 orig = data = RREG32(RLC_PG_CNTL);
6077 if (enable)
6078 data &= ~DISABLE_GDS_PG;
6079 else
6080 data |= DISABLE_GDS_PG;
6081 if (orig != data)
6082 WREG32(RLC_PG_CNTL, data);
6083}
6084
6085#define CP_ME_TABLE_SIZE 96
6086#define CP_ME_TABLE_OFFSET 2048
6087#define CP_MEC_TABLE_OFFSET 4096
6088
6089void cik_init_cp_pg_table(struct radeon_device *rdev)
6090{
6091 const __be32 *fw_data;
6092 volatile u32 *dst_ptr;
6093 int me, i, max_me = 4;
6094 u32 bo_offset = 0;
6095 u32 table_offset;
6096
6097 if (rdev->family == CHIP_KAVERI)
6098 max_me = 5;
6099
6100 if (rdev->rlc.cp_table_ptr == NULL)
6101 return;
6102
6103 /* write the cp table buffer */
6104 dst_ptr = rdev->rlc.cp_table_ptr;
6105 for (me = 0; me < max_me; me++) {
6106 if (me == 0) {
6107 fw_data = (const __be32 *)rdev->ce_fw->data;
6108 table_offset = CP_ME_TABLE_OFFSET;
6109 } else if (me == 1) {
6110 fw_data = (const __be32 *)rdev->pfp_fw->data;
6111 table_offset = CP_ME_TABLE_OFFSET;
6112 } else if (me == 2) {
6113 fw_data = (const __be32 *)rdev->me_fw->data;
6114 table_offset = CP_ME_TABLE_OFFSET;
6115 } else {
6116 fw_data = (const __be32 *)rdev->mec_fw->data;
6117 table_offset = CP_MEC_TABLE_OFFSET;
6118 }
6119
6120 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6121 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6122 }
6123 bo_offset += CP_ME_TABLE_SIZE;
6124 }
6125}
6126
6127static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6128 bool enable)
6129{
6130 u32 data, orig;
6131
6132 if (enable) {
6133 orig = data = RREG32(RLC_PG_CNTL);
6134 data |= GFX_PG_ENABLE;
6135 if (orig != data)
6136 WREG32(RLC_PG_CNTL, data);
6137
6138 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6139 data |= AUTO_PG_EN;
6140 if (orig != data)
6141 WREG32(RLC_AUTO_PG_CTRL, data);
6142 } else {
6143 orig = data = RREG32(RLC_PG_CNTL);
6144 data &= ~GFX_PG_ENABLE;
6145 if (orig != data)
6146 WREG32(RLC_PG_CNTL, data);
6147
6148 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6149 data &= ~AUTO_PG_EN;
6150 if (orig != data)
6151 WREG32(RLC_AUTO_PG_CTRL, data);
6152
6153 data = RREG32(DB_RENDER_CONTROL);
6154 }
6155}
6156
6157static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6158{
6159 u32 mask = 0, tmp, tmp1;
6160 int i;
6161
6162 cik_select_se_sh(rdev, se, sh);
6163 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6164 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6165 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6166
6167 tmp &= 0xffff0000;
6168
6169 tmp |= tmp1;
6170 tmp >>= 16;
6171
6172 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6173 mask <<= 1;
6174 mask |= 1;
6175 }
6176
6177 return (~tmp) & mask;
6178}
6179
6180static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6181{
6182 u32 i, j, k, active_cu_number = 0;
6183 u32 mask, counter, cu_bitmap;
6184 u32 tmp = 0;
6185
6186 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6187 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6188 mask = 1;
6189 cu_bitmap = 0;
6190 counter = 0;
6191 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6192 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6193 if (counter < 2)
6194 cu_bitmap |= mask;
6195 counter ++;
6196 }
6197 mask <<= 1;
6198 }
6199
6200 active_cu_number += counter;
6201 tmp |= (cu_bitmap << (i * 16 + j * 8));
6202 }
6203 }
6204
6205 WREG32(RLC_PG_AO_CU_MASK, tmp);
6206
6207 tmp = RREG32(RLC_MAX_PG_CU);
6208 tmp &= ~MAX_PU_CU_MASK;
6209 tmp |= MAX_PU_CU(active_cu_number);
6210 WREG32(RLC_MAX_PG_CU, tmp);
6211}
6212
6213static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6214 bool enable)
6215{
6216 u32 data, orig;
6217
6218 orig = data = RREG32(RLC_PG_CNTL);
6219 if (enable)
6220 data |= STATIC_PER_CU_PG_ENABLE;
6221 else
6222 data &= ~STATIC_PER_CU_PG_ENABLE;
6223 if (orig != data)
6224 WREG32(RLC_PG_CNTL, data);
6225}
6226
6227static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6228 bool enable)
6229{
6230 u32 data, orig;
6231
6232 orig = data = RREG32(RLC_PG_CNTL);
6233 if (enable)
6234 data |= DYN_PER_CU_PG_ENABLE;
6235 else
6236 data &= ~DYN_PER_CU_PG_ENABLE;
6237 if (orig != data)
6238 WREG32(RLC_PG_CNTL, data);
6239}
6240
6241#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6242#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6243
6244static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6245{
6246 u32 data, orig;
6247 u32 i;
6248
6249 if (rdev->rlc.cs_data) {
6250 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6251 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6252 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6253 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6254 } else {
6255 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6256 for (i = 0; i < 3; i++)
6257 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6258 }
6259 if (rdev->rlc.reg_list) {
6260 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6261 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6262 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6263 }
6264
6265 orig = data = RREG32(RLC_PG_CNTL);
6266 data |= GFX_PG_SRC;
6267 if (orig != data)
6268 WREG32(RLC_PG_CNTL, data);
6269
6270 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6271 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6272
6273 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6274 data &= ~IDLE_POLL_COUNT_MASK;
6275 data |= IDLE_POLL_COUNT(0x60);
6276 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6277
6278 data = 0x10101010;
6279 WREG32(RLC_PG_DELAY, data);
6280
6281 data = RREG32(RLC_PG_DELAY_2);
6282 data &= ~0xff;
6283 data |= 0x3;
6284 WREG32(RLC_PG_DELAY_2, data);
6285
6286 data = RREG32(RLC_AUTO_PG_CTRL);
6287 data &= ~GRBM_REG_SGIT_MASK;
6288 data |= GRBM_REG_SGIT(0x700);
6289 WREG32(RLC_AUTO_PG_CTRL, data);
6290
6291}
6292
6293static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6294{
6295 bool has_pg = false;
6296 bool has_dyn_mgpg = false;
6297 bool has_static_mgpg = false;
6298
6299 /* only APUs have PG */
6300 if (rdev->flags & RADEON_IS_IGP) {
6301 has_pg = true;
6302 has_static_mgpg = true;
6303 if (rdev->family == CHIP_KAVERI)
6304 has_dyn_mgpg = true;
6305 }
6306
6307 if (has_pg) {
6308 cik_enable_gfx_cgpg(rdev, enable);
6309 if (enable) {
6310 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6311 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6312 } else {
6313 cik_enable_gfx_static_mgpg(rdev, false);
6314 cik_enable_gfx_dynamic_mgpg(rdev, false);
6315 }
6316 }
6317
6318}
6319
6320void cik_init_pg(struct radeon_device *rdev)
6321{
6322 bool has_pg = false;
6323
6324 /* only APUs have PG */
6325 if (rdev->flags & RADEON_IS_IGP) {
6326 /* XXX disable this for now */
6327 /* has_pg = true; */
6328 }
6329
6330 if (has_pg) {
6331 cik_enable_sck_slowdown_on_pu(rdev, true);
6332 cik_enable_sck_slowdown_on_pd(rdev, true);
6333 cik_init_gfx_cgpg(rdev);
6334 cik_enable_cp_pg(rdev, true);
6335 cik_enable_gds_pg(rdev, true);
6336 cik_init_ao_cu_mask(rdev);
6337 cik_update_gfx_pg(rdev, true);
6338 }
6339}
6340
a59781bb
AD
6341/*
6342 * Interrupts
6343 * Starting with r6xx, interrupts are handled via a ring buffer.
6344 * Ring buffers are areas of GPU accessible memory that the GPU
6345 * writes interrupt vectors into and the host reads vectors out of.
6346 * There is a rptr (read pointer) that determines where the
6347 * host is currently reading, and a wptr (write pointer)
6348 * which determines where the GPU has written. When the
6349 * pointers are equal, the ring is idle. When the GPU
6350 * writes vectors to the ring buffer, it increments the
6351 * wptr. When there is an interrupt, the host then starts
6352 * fetching commands and processing them until the pointers are
6353 * equal again at which point it updates the rptr.
6354 */
6355
6356/**
6357 * cik_enable_interrupts - Enable the interrupt ring buffer
6358 *
6359 * @rdev: radeon_device pointer
6360 *
6361 * Enable the interrupt ring buffer (CIK).
6362 */
6363static void cik_enable_interrupts(struct radeon_device *rdev)
6364{
6365 u32 ih_cntl = RREG32(IH_CNTL);
6366 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6367
6368 ih_cntl |= ENABLE_INTR;
6369 ih_rb_cntl |= IH_RB_ENABLE;
6370 WREG32(IH_CNTL, ih_cntl);
6371 WREG32(IH_RB_CNTL, ih_rb_cntl);
6372 rdev->ih.enabled = true;
6373}
6374
6375/**
6376 * cik_disable_interrupts - Disable the interrupt ring buffer
6377 *
6378 * @rdev: radeon_device pointer
6379 *
6380 * Disable the interrupt ring buffer (CIK).
6381 */
6382static void cik_disable_interrupts(struct radeon_device *rdev)
6383{
6384 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6385 u32 ih_cntl = RREG32(IH_CNTL);
6386
6387 ih_rb_cntl &= ~IH_RB_ENABLE;
6388 ih_cntl &= ~ENABLE_INTR;
6389 WREG32(IH_RB_CNTL, ih_rb_cntl);
6390 WREG32(IH_CNTL, ih_cntl);
6391 /* set rptr, wptr to 0 */
6392 WREG32(IH_RB_RPTR, 0);
6393 WREG32(IH_RB_WPTR, 0);
6394 rdev->ih.enabled = false;
6395 rdev->ih.rptr = 0;
6396}
6397
6398/**
6399 * cik_disable_interrupt_state - Disable all interrupt sources
6400 *
6401 * @rdev: radeon_device pointer
6402 *
6403 * Clear all interrupt enable bits used by the driver (CIK).
6404 */
6405static void cik_disable_interrupt_state(struct radeon_device *rdev)
6406{
6407 u32 tmp;
6408
6409 /* gfx ring */
6410 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
21a93e13
AD
6411 /* sdma */
6412 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6413 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6414 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6415 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
a59781bb
AD
6416 /* compute queues */
6417 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6418 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6419 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6420 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6421 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6422 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6423 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6424 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6425 /* grbm */
6426 WREG32(GRBM_INT_CNTL, 0);
6427 /* vline/vblank, etc. */
6428 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6429 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6430 if (rdev->num_crtc >= 4) {
6431 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6432 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6433 }
6434 if (rdev->num_crtc >= 6) {
6435 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6436 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6437 }
6438
6439 /* dac hotplug */
6440 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6441
6442 /* digital hotplug */
6443 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6444 WREG32(DC_HPD1_INT_CONTROL, tmp);
6445 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6446 WREG32(DC_HPD2_INT_CONTROL, tmp);
6447 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6448 WREG32(DC_HPD3_INT_CONTROL, tmp);
6449 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6450 WREG32(DC_HPD4_INT_CONTROL, tmp);
6451 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6452 WREG32(DC_HPD5_INT_CONTROL, tmp);
6453 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6454 WREG32(DC_HPD6_INT_CONTROL, tmp);
6455
6456}
6457
6458/**
6459 * cik_irq_init - init and enable the interrupt ring
6460 *
6461 * @rdev: radeon_device pointer
6462 *
6463 * Allocate a ring buffer for the interrupt controller,
6464 * enable the RLC, disable interrupts, enable the IH
6465 * ring buffer and enable it (CIK).
6466 * Called at device load and reume.
6467 * Returns 0 for success, errors for failure.
6468 */
6469static int cik_irq_init(struct radeon_device *rdev)
6470{
6471 int ret = 0;
6472 int rb_bufsz;
6473 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6474
6475 /* allocate ring */
6476 ret = r600_ih_ring_alloc(rdev);
6477 if (ret)
6478 return ret;
6479
6480 /* disable irqs */
6481 cik_disable_interrupts(rdev);
6482
6483 /* init rlc */
6484 ret = cik_rlc_resume(rdev);
6485 if (ret) {
6486 r600_ih_ring_fini(rdev);
6487 return ret;
6488 }
6489
6490 /* setup interrupt control */
6491 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6492 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6493 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6494 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6495 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6496 */
6497 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6498 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6499 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6500 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6501
6502 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6503 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6504
6505 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6506 IH_WPTR_OVERFLOW_CLEAR |
6507 (rb_bufsz << 1));
6508
6509 if (rdev->wb.enabled)
6510 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6511
6512 /* set the writeback address whether it's enabled or not */
6513 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6514 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6515
6516 WREG32(IH_RB_CNTL, ih_rb_cntl);
6517
6518 /* set rptr, wptr to 0 */
6519 WREG32(IH_RB_RPTR, 0);
6520 WREG32(IH_RB_WPTR, 0);
6521
6522 /* Default settings for IH_CNTL (disabled at first) */
6523 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6524 /* RPTR_REARM only works if msi's are enabled */
6525 if (rdev->msi_enabled)
6526 ih_cntl |= RPTR_REARM;
6527 WREG32(IH_CNTL, ih_cntl);
6528
6529 /* force the active interrupt state to all disabled */
6530 cik_disable_interrupt_state(rdev);
6531
6532 pci_set_master(rdev->pdev);
6533
6534 /* enable irqs */
6535 cik_enable_interrupts(rdev);
6536
6537 return ret;
6538}
6539
6540/**
6541 * cik_irq_set - enable/disable interrupt sources
6542 *
6543 * @rdev: radeon_device pointer
6544 *
6545 * Enable interrupt sources on the GPU (vblanks, hpd,
6546 * etc.) (CIK).
6547 * Returns 0 for success, errors for failure.
6548 */
6549int cik_irq_set(struct radeon_device *rdev)
6550{
6551 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6552 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
2b0781a6
AD
6553 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6554 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
a59781bb
AD
6555 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6556 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6557 u32 grbm_int_cntl = 0;
21a93e13 6558 u32 dma_cntl, dma_cntl1;
a59781bb
AD
6559
6560 if (!rdev->irq.installed) {
6561 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6562 return -EINVAL;
6563 }
6564 /* don't enable anything if the ih is disabled */
6565 if (!rdev->ih.enabled) {
6566 cik_disable_interrupts(rdev);
6567 /* force the active interrupt state to all disabled */
6568 cik_disable_interrupt_state(rdev);
6569 return 0;
6570 }
6571
6572 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6573 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6574 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6575 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6576 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6577 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6578
21a93e13
AD
6579 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6580 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6581
2b0781a6
AD
6582 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6583 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6584 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6585 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6586 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6587 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6588 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6589 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6590
a59781bb
AD
6591 /* enable CP interrupts on all rings */
6592 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6593 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6594 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6595 }
2b0781a6
AD
6596 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6597 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6598 DRM_DEBUG("si_irq_set: sw int cp1\n");
6599 if (ring->me == 1) {
6600 switch (ring->pipe) {
6601 case 0:
6602 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6603 break;
6604 case 1:
6605 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6606 break;
6607 case 2:
6608 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6609 break;
6610 case 3:
6611 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6612 break;
6613 default:
6614 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6615 break;
6616 }
6617 } else if (ring->me == 2) {
6618 switch (ring->pipe) {
6619 case 0:
6620 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6621 break;
6622 case 1:
6623 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6624 break;
6625 case 2:
6626 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6627 break;
6628 case 3:
6629 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6630 break;
6631 default:
6632 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6633 break;
6634 }
6635 } else {
6636 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6637 }
6638 }
6639 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6640 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6641 DRM_DEBUG("si_irq_set: sw int cp2\n");
6642 if (ring->me == 1) {
6643 switch (ring->pipe) {
6644 case 0:
6645 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6646 break;
6647 case 1:
6648 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6649 break;
6650 case 2:
6651 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6652 break;
6653 case 3:
6654 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6655 break;
6656 default:
6657 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6658 break;
6659 }
6660 } else if (ring->me == 2) {
6661 switch (ring->pipe) {
6662 case 0:
6663 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6664 break;
6665 case 1:
6666 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6667 break;
6668 case 2:
6669 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6670 break;
6671 case 3:
6672 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6673 break;
6674 default:
6675 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6676 break;
6677 }
6678 } else {
6679 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6680 }
6681 }
a59781bb 6682
21a93e13
AD
6683 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6684 DRM_DEBUG("cik_irq_set: sw int dma\n");
6685 dma_cntl |= TRAP_ENABLE;
6686 }
6687
6688 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6689 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6690 dma_cntl1 |= TRAP_ENABLE;
6691 }
6692
a59781bb
AD
6693 if (rdev->irq.crtc_vblank_int[0] ||
6694 atomic_read(&rdev->irq.pflip[0])) {
6695 DRM_DEBUG("cik_irq_set: vblank 0\n");
6696 crtc1 |= VBLANK_INTERRUPT_MASK;
6697 }
6698 if (rdev->irq.crtc_vblank_int[1] ||
6699 atomic_read(&rdev->irq.pflip[1])) {
6700 DRM_DEBUG("cik_irq_set: vblank 1\n");
6701 crtc2 |= VBLANK_INTERRUPT_MASK;
6702 }
6703 if (rdev->irq.crtc_vblank_int[2] ||
6704 atomic_read(&rdev->irq.pflip[2])) {
6705 DRM_DEBUG("cik_irq_set: vblank 2\n");
6706 crtc3 |= VBLANK_INTERRUPT_MASK;
6707 }
6708 if (rdev->irq.crtc_vblank_int[3] ||
6709 atomic_read(&rdev->irq.pflip[3])) {
6710 DRM_DEBUG("cik_irq_set: vblank 3\n");
6711 crtc4 |= VBLANK_INTERRUPT_MASK;
6712 }
6713 if (rdev->irq.crtc_vblank_int[4] ||
6714 atomic_read(&rdev->irq.pflip[4])) {
6715 DRM_DEBUG("cik_irq_set: vblank 4\n");
6716 crtc5 |= VBLANK_INTERRUPT_MASK;
6717 }
6718 if (rdev->irq.crtc_vblank_int[5] ||
6719 atomic_read(&rdev->irq.pflip[5])) {
6720 DRM_DEBUG("cik_irq_set: vblank 5\n");
6721 crtc6 |= VBLANK_INTERRUPT_MASK;
6722 }
6723 if (rdev->irq.hpd[0]) {
6724 DRM_DEBUG("cik_irq_set: hpd 1\n");
6725 hpd1 |= DC_HPDx_INT_EN;
6726 }
6727 if (rdev->irq.hpd[1]) {
6728 DRM_DEBUG("cik_irq_set: hpd 2\n");
6729 hpd2 |= DC_HPDx_INT_EN;
6730 }
6731 if (rdev->irq.hpd[2]) {
6732 DRM_DEBUG("cik_irq_set: hpd 3\n");
6733 hpd3 |= DC_HPDx_INT_EN;
6734 }
6735 if (rdev->irq.hpd[3]) {
6736 DRM_DEBUG("cik_irq_set: hpd 4\n");
6737 hpd4 |= DC_HPDx_INT_EN;
6738 }
6739 if (rdev->irq.hpd[4]) {
6740 DRM_DEBUG("cik_irq_set: hpd 5\n");
6741 hpd5 |= DC_HPDx_INT_EN;
6742 }
6743 if (rdev->irq.hpd[5]) {
6744 DRM_DEBUG("cik_irq_set: hpd 6\n");
6745 hpd6 |= DC_HPDx_INT_EN;
6746 }
6747
6748 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6749
21a93e13
AD
6750 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6751 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6752
2b0781a6
AD
6753 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6754 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6755 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6756 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6757 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6758 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6759 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6760 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6761
a59781bb
AD
6762 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6763
6764 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6765 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6766 if (rdev->num_crtc >= 4) {
6767 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6768 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6769 }
6770 if (rdev->num_crtc >= 6) {
6771 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6772 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6773 }
6774
6775 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6776 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6777 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6778 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6779 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6780 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6781
6782 return 0;
6783}
6784
6785/**
6786 * cik_irq_ack - ack interrupt sources
6787 *
6788 * @rdev: radeon_device pointer
6789 *
6790 * Ack interrupt sources on the GPU (vblanks, hpd,
6791 * etc.) (CIK). Certain interrupts sources are sw
6792 * generated and do not require an explicit ack.
6793 */
6794static inline void cik_irq_ack(struct radeon_device *rdev)
6795{
6796 u32 tmp;
6797
6798 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6799 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6800 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6801 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6802 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6803 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6804 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6805
6806 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6807 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6808 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6809 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6810 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6811 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6812 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6813 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6814
6815 if (rdev->num_crtc >= 4) {
6816 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6817 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6818 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6819 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6820 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6821 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6822 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6823 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6824 }
6825
6826 if (rdev->num_crtc >= 6) {
6827 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6828 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6829 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6830 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6831 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6832 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6833 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6834 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6835 }
6836
6837 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6838 tmp = RREG32(DC_HPD1_INT_CONTROL);
6839 tmp |= DC_HPDx_INT_ACK;
6840 WREG32(DC_HPD1_INT_CONTROL, tmp);
6841 }
6842 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6843 tmp = RREG32(DC_HPD2_INT_CONTROL);
6844 tmp |= DC_HPDx_INT_ACK;
6845 WREG32(DC_HPD2_INT_CONTROL, tmp);
6846 }
6847 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6848 tmp = RREG32(DC_HPD3_INT_CONTROL);
6849 tmp |= DC_HPDx_INT_ACK;
6850 WREG32(DC_HPD3_INT_CONTROL, tmp);
6851 }
6852 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6853 tmp = RREG32(DC_HPD4_INT_CONTROL);
6854 tmp |= DC_HPDx_INT_ACK;
6855 WREG32(DC_HPD4_INT_CONTROL, tmp);
6856 }
6857 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6858 tmp = RREG32(DC_HPD5_INT_CONTROL);
6859 tmp |= DC_HPDx_INT_ACK;
6860 WREG32(DC_HPD5_INT_CONTROL, tmp);
6861 }
6862 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6863 tmp = RREG32(DC_HPD5_INT_CONTROL);
6864 tmp |= DC_HPDx_INT_ACK;
6865 WREG32(DC_HPD6_INT_CONTROL, tmp);
6866 }
6867}
6868
6869/**
6870 * cik_irq_disable - disable interrupts
6871 *
6872 * @rdev: radeon_device pointer
6873 *
6874 * Disable interrupts on the hw (CIK).
6875 */
6876static void cik_irq_disable(struct radeon_device *rdev)
6877{
6878 cik_disable_interrupts(rdev);
6879 /* Wait and acknowledge irq */
6880 mdelay(1);
6881 cik_irq_ack(rdev);
6882 cik_disable_interrupt_state(rdev);
6883}
6884
6885/**
6886 * cik_irq_disable - disable interrupts for suspend
6887 *
6888 * @rdev: radeon_device pointer
6889 *
6890 * Disable interrupts and stop the RLC (CIK).
6891 * Used for suspend.
6892 */
6893static void cik_irq_suspend(struct radeon_device *rdev)
6894{
6895 cik_irq_disable(rdev);
6896 cik_rlc_stop(rdev);
6897}
6898
6899/**
6900 * cik_irq_fini - tear down interrupt support
6901 *
6902 * @rdev: radeon_device pointer
6903 *
6904 * Disable interrupts on the hw and free the IH ring
6905 * buffer (CIK).
6906 * Used for driver unload.
6907 */
6908static void cik_irq_fini(struct radeon_device *rdev)
6909{
6910 cik_irq_suspend(rdev);
6911 r600_ih_ring_fini(rdev);
6912}
6913
6914/**
6915 * cik_get_ih_wptr - get the IH ring buffer wptr
6916 *
6917 * @rdev: radeon_device pointer
6918 *
6919 * Get the IH ring buffer wptr from either the register
6920 * or the writeback memory buffer (CIK). Also check for
6921 * ring buffer overflow and deal with it.
6922 * Used by cik_irq_process().
6923 * Returns the value of the wptr.
6924 */
6925static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6926{
6927 u32 wptr, tmp;
6928
6929 if (rdev->wb.enabled)
6930 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6931 else
6932 wptr = RREG32(IH_RB_WPTR);
6933
6934 if (wptr & RB_OVERFLOW) {
6935 /* When a ring buffer overflow happen start parsing interrupt
6936 * from the last not overwritten vector (wptr + 16). Hopefully
6937 * this should allow us to catchup.
6938 */
6939 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6940 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6941 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6942 tmp = RREG32(IH_RB_CNTL);
6943 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6944 WREG32(IH_RB_CNTL, tmp);
6945 }
6946 return (wptr & rdev->ih.ptr_mask);
6947}
6948
6949/* CIK IV Ring
6950 * Each IV ring entry is 128 bits:
6951 * [7:0] - interrupt source id
6952 * [31:8] - reserved
6953 * [59:32] - interrupt source data
6954 * [63:60] - reserved
21a93e13
AD
6955 * [71:64] - RINGID
6956 * CP:
6957 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
a59781bb
AD
6958 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6959 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6960 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6961 * PIPE_ID - ME0 0=3D
6962 * - ME1&2 compute dispatcher (4 pipes each)
21a93e13
AD
6963 * SDMA:
6964 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6965 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6966 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
a59781bb
AD
6967 * [79:72] - VMID
6968 * [95:80] - PASID
6969 * [127:96] - reserved
6970 */
6971/**
6972 * cik_irq_process - interrupt handler
6973 *
6974 * @rdev: radeon_device pointer
6975 *
6976 * Interrupt hander (CIK). Walk the IH ring,
6977 * ack interrupts and schedule work to handle
6978 * interrupt events.
6979 * Returns irq process return code.
6980 */
6981int cik_irq_process(struct radeon_device *rdev)
6982{
2b0781a6
AD
6983 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6984 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
a59781bb
AD
6985 u32 wptr;
6986 u32 rptr;
6987 u32 src_id, src_data, ring_id;
6988 u8 me_id, pipe_id, queue_id;
6989 u32 ring_index;
6990 bool queue_hotplug = false;
6991 bool queue_reset = false;
3ec7d11b 6992 u32 addr, status, mc_client;
a59781bb
AD
6993
6994 if (!rdev->ih.enabled || rdev->shutdown)
6995 return IRQ_NONE;
6996
6997 wptr = cik_get_ih_wptr(rdev);
6998
6999restart_ih:
7000 /* is somebody else already processing irqs? */
7001 if (atomic_xchg(&rdev->ih.lock, 1))
7002 return IRQ_NONE;
7003
7004 rptr = rdev->ih.rptr;
7005 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7006
7007 /* Order reading of wptr vs. reading of IH ring data */
7008 rmb();
7009
7010 /* display interrupts */
7011 cik_irq_ack(rdev);
7012
7013 while (rptr != wptr) {
7014 /* wptr/rptr are in bytes! */
7015 ring_index = rptr / 4;
7016 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7017 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7018 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
a59781bb
AD
7019
7020 switch (src_id) {
7021 case 1: /* D1 vblank/vline */
7022 switch (src_data) {
7023 case 0: /* D1 vblank */
7024 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7025 if (rdev->irq.crtc_vblank_int[0]) {
7026 drm_handle_vblank(rdev->ddev, 0);
7027 rdev->pm.vblank_sync = true;
7028 wake_up(&rdev->irq.vblank_queue);
7029 }
7030 if (atomic_read(&rdev->irq.pflip[0]))
7031 radeon_crtc_handle_flip(rdev, 0);
7032 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7033 DRM_DEBUG("IH: D1 vblank\n");
7034 }
7035 break;
7036 case 1: /* D1 vline */
7037 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7038 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7039 DRM_DEBUG("IH: D1 vline\n");
7040 }
7041 break;
7042 default:
7043 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7044 break;
7045 }
7046 break;
7047 case 2: /* D2 vblank/vline */
7048 switch (src_data) {
7049 case 0: /* D2 vblank */
7050 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7051 if (rdev->irq.crtc_vblank_int[1]) {
7052 drm_handle_vblank(rdev->ddev, 1);
7053 rdev->pm.vblank_sync = true;
7054 wake_up(&rdev->irq.vblank_queue);
7055 }
7056 if (atomic_read(&rdev->irq.pflip[1]))
7057 radeon_crtc_handle_flip(rdev, 1);
7058 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7059 DRM_DEBUG("IH: D2 vblank\n");
7060 }
7061 break;
7062 case 1: /* D2 vline */
7063 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7064 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7065 DRM_DEBUG("IH: D2 vline\n");
7066 }
7067 break;
7068 default:
7069 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7070 break;
7071 }
7072 break;
7073 case 3: /* D3 vblank/vline */
7074 switch (src_data) {
7075 case 0: /* D3 vblank */
7076 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7077 if (rdev->irq.crtc_vblank_int[2]) {
7078 drm_handle_vblank(rdev->ddev, 2);
7079 rdev->pm.vblank_sync = true;
7080 wake_up(&rdev->irq.vblank_queue);
7081 }
7082 if (atomic_read(&rdev->irq.pflip[2]))
7083 radeon_crtc_handle_flip(rdev, 2);
7084 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7085 DRM_DEBUG("IH: D3 vblank\n");
7086 }
7087 break;
7088 case 1: /* D3 vline */
7089 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7090 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7091 DRM_DEBUG("IH: D3 vline\n");
7092 }
7093 break;
7094 default:
7095 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7096 break;
7097 }
7098 break;
7099 case 4: /* D4 vblank/vline */
7100 switch (src_data) {
7101 case 0: /* D4 vblank */
7102 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7103 if (rdev->irq.crtc_vblank_int[3]) {
7104 drm_handle_vblank(rdev->ddev, 3);
7105 rdev->pm.vblank_sync = true;
7106 wake_up(&rdev->irq.vblank_queue);
7107 }
7108 if (atomic_read(&rdev->irq.pflip[3]))
7109 radeon_crtc_handle_flip(rdev, 3);
7110 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7111 DRM_DEBUG("IH: D4 vblank\n");
7112 }
7113 break;
7114 case 1: /* D4 vline */
7115 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7116 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7117 DRM_DEBUG("IH: D4 vline\n");
7118 }
7119 break;
7120 default:
7121 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7122 break;
7123 }
7124 break;
7125 case 5: /* D5 vblank/vline */
7126 switch (src_data) {
7127 case 0: /* D5 vblank */
7128 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7129 if (rdev->irq.crtc_vblank_int[4]) {
7130 drm_handle_vblank(rdev->ddev, 4);
7131 rdev->pm.vblank_sync = true;
7132 wake_up(&rdev->irq.vblank_queue);
7133 }
7134 if (atomic_read(&rdev->irq.pflip[4]))
7135 radeon_crtc_handle_flip(rdev, 4);
7136 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7137 DRM_DEBUG("IH: D5 vblank\n");
7138 }
7139 break;
7140 case 1: /* D5 vline */
7141 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7142 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7143 DRM_DEBUG("IH: D5 vline\n");
7144 }
7145 break;
7146 default:
7147 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7148 break;
7149 }
7150 break;
7151 case 6: /* D6 vblank/vline */
7152 switch (src_data) {
7153 case 0: /* D6 vblank */
7154 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7155 if (rdev->irq.crtc_vblank_int[5]) {
7156 drm_handle_vblank(rdev->ddev, 5);
7157 rdev->pm.vblank_sync = true;
7158 wake_up(&rdev->irq.vblank_queue);
7159 }
7160 if (atomic_read(&rdev->irq.pflip[5]))
7161 radeon_crtc_handle_flip(rdev, 5);
7162 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7163 DRM_DEBUG("IH: D6 vblank\n");
7164 }
7165 break;
7166 case 1: /* D6 vline */
7167 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7168 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7169 DRM_DEBUG("IH: D6 vline\n");
7170 }
7171 break;
7172 default:
7173 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7174 break;
7175 }
7176 break;
7177 case 42: /* HPD hotplug */
7178 switch (src_data) {
7179 case 0:
7180 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7181 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7182 queue_hotplug = true;
7183 DRM_DEBUG("IH: HPD1\n");
7184 }
7185 break;
7186 case 1:
7187 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7188 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7189 queue_hotplug = true;
7190 DRM_DEBUG("IH: HPD2\n");
7191 }
7192 break;
7193 case 2:
7194 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7195 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7196 queue_hotplug = true;
7197 DRM_DEBUG("IH: HPD3\n");
7198 }
7199 break;
7200 case 3:
7201 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7202 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7203 queue_hotplug = true;
7204 DRM_DEBUG("IH: HPD4\n");
7205 }
7206 break;
7207 case 4:
7208 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7209 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7210 queue_hotplug = true;
7211 DRM_DEBUG("IH: HPD5\n");
7212 }
7213 break;
7214 case 5:
7215 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7216 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7217 queue_hotplug = true;
7218 DRM_DEBUG("IH: HPD6\n");
7219 }
7220 break;
7221 default:
7222 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7223 break;
7224 }
7225 break;
9d97c99b
AD
7226 case 146:
7227 case 147:
3ec7d11b
AD
7228 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7229 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7230 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
9d97c99b
AD
7231 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7232 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3ec7d11b 7233 addr);
9d97c99b 7234 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3ec7d11b
AD
7235 status);
7236 cik_vm_decode_fault(rdev, status, addr, mc_client);
9d97c99b
AD
7237 /* reset addr and status */
7238 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7239 break;
a59781bb
AD
7240 case 176: /* GFX RB CP_INT */
7241 case 177: /* GFX IB CP_INT */
7242 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7243 break;
7244 case 181: /* CP EOP event */
7245 DRM_DEBUG("IH: CP EOP\n");
21a93e13
AD
7246 /* XXX check the bitfield order! */
7247 me_id = (ring_id & 0x60) >> 5;
7248 pipe_id = (ring_id & 0x18) >> 3;
7249 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
7250 switch (me_id) {
7251 case 0:
7252 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7253 break;
7254 case 1:
a59781bb 7255 case 2:
2b0781a6
AD
7256 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7257 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7258 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7259 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
a59781bb
AD
7260 break;
7261 }
7262 break;
7263 case 184: /* CP Privileged reg access */
7264 DRM_ERROR("Illegal register access in command stream\n");
7265 /* XXX check the bitfield order! */
7266 me_id = (ring_id & 0x60) >> 5;
7267 pipe_id = (ring_id & 0x18) >> 3;
7268 queue_id = (ring_id & 0x7) >> 0;
7269 switch (me_id) {
7270 case 0:
7271 /* This results in a full GPU reset, but all we need to do is soft
7272 * reset the CP for gfx
7273 */
7274 queue_reset = true;
7275 break;
7276 case 1:
7277 /* XXX compute */
2b0781a6 7278 queue_reset = true;
a59781bb
AD
7279 break;
7280 case 2:
7281 /* XXX compute */
2b0781a6 7282 queue_reset = true;
a59781bb
AD
7283 break;
7284 }
7285 break;
7286 case 185: /* CP Privileged inst */
7287 DRM_ERROR("Illegal instruction in command stream\n");
21a93e13
AD
7288 /* XXX check the bitfield order! */
7289 me_id = (ring_id & 0x60) >> 5;
7290 pipe_id = (ring_id & 0x18) >> 3;
7291 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
7292 switch (me_id) {
7293 case 0:
7294 /* This results in a full GPU reset, but all we need to do is soft
7295 * reset the CP for gfx
7296 */
7297 queue_reset = true;
7298 break;
7299 case 1:
7300 /* XXX compute */
2b0781a6 7301 queue_reset = true;
a59781bb
AD
7302 break;
7303 case 2:
7304 /* XXX compute */
2b0781a6 7305 queue_reset = true;
a59781bb
AD
7306 break;
7307 }
7308 break;
21a93e13
AD
7309 case 224: /* SDMA trap event */
7310 /* XXX check the bitfield order! */
7311 me_id = (ring_id & 0x3) >> 0;
7312 queue_id = (ring_id & 0xc) >> 2;
7313 DRM_DEBUG("IH: SDMA trap\n");
7314 switch (me_id) {
7315 case 0:
7316 switch (queue_id) {
7317 case 0:
7318 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7319 break;
7320 case 1:
7321 /* XXX compute */
7322 break;
7323 case 2:
7324 /* XXX compute */
7325 break;
7326 }
7327 break;
7328 case 1:
7329 switch (queue_id) {
7330 case 0:
7331 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7332 break;
7333 case 1:
7334 /* XXX compute */
7335 break;
7336 case 2:
7337 /* XXX compute */
7338 break;
7339 }
7340 break;
7341 }
7342 break;
7343 case 241: /* SDMA Privileged inst */
7344 case 247: /* SDMA Privileged inst */
7345 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7346 /* XXX check the bitfield order! */
7347 me_id = (ring_id & 0x3) >> 0;
7348 queue_id = (ring_id & 0xc) >> 2;
7349 switch (me_id) {
7350 case 0:
7351 switch (queue_id) {
7352 case 0:
7353 queue_reset = true;
7354 break;
7355 case 1:
7356 /* XXX compute */
7357 queue_reset = true;
7358 break;
7359 case 2:
7360 /* XXX compute */
7361 queue_reset = true;
7362 break;
7363 }
7364 break;
7365 case 1:
7366 switch (queue_id) {
7367 case 0:
7368 queue_reset = true;
7369 break;
7370 case 1:
7371 /* XXX compute */
7372 queue_reset = true;
7373 break;
7374 case 2:
7375 /* XXX compute */
7376 queue_reset = true;
7377 break;
7378 }
7379 break;
7380 }
7381 break;
a59781bb
AD
7382 case 233: /* GUI IDLE */
7383 DRM_DEBUG("IH: GUI idle\n");
7384 break;
7385 default:
7386 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7387 break;
7388 }
7389
7390 /* wptr/rptr are in bytes! */
7391 rptr += 16;
7392 rptr &= rdev->ih.ptr_mask;
7393 }
7394 if (queue_hotplug)
7395 schedule_work(&rdev->hotplug_work);
7396 if (queue_reset)
7397 schedule_work(&rdev->reset_work);
7398 rdev->ih.rptr = rptr;
7399 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7400 atomic_set(&rdev->ih.lock, 0);
7401
7402 /* make sure wptr hasn't changed while processing */
7403 wptr = cik_get_ih_wptr(rdev);
7404 if (wptr != rptr)
7405 goto restart_ih;
7406
7407 return IRQ_HANDLED;
7408}
7bf94a2c
AD
7409
7410/*
7411 * startup/shutdown callbacks
7412 */
7413/**
7414 * cik_startup - program the asic to a functional state
7415 *
7416 * @rdev: radeon_device pointer
7417 *
7418 * Programs the asic to a functional state (CIK).
7419 * Called by cik_init() and cik_resume().
7420 * Returns 0 for success, error for failure.
7421 */
7422static int cik_startup(struct radeon_device *rdev)
7423{
7424 struct radeon_ring *ring;
7425 int r;
7426
8a7cd276
AD
7427 /* enable pcie gen2/3 link */
7428 cik_pcie_gen3_enable(rdev);
7235711a
AD
7429 /* enable aspm */
7430 cik_program_aspm(rdev);
8a7cd276 7431
6fab3feb
AD
7432 cik_mc_program(rdev);
7433
7bf94a2c
AD
7434 if (rdev->flags & RADEON_IS_IGP) {
7435 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7436 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7437 r = cik_init_microcode(rdev);
7438 if (r) {
7439 DRM_ERROR("Failed to load firmware!\n");
7440 return r;
7441 }
7442 }
7443 } else {
7444 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7445 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7446 !rdev->mc_fw) {
7447 r = cik_init_microcode(rdev);
7448 if (r) {
7449 DRM_ERROR("Failed to load firmware!\n");
7450 return r;
7451 }
7452 }
7453
7454 r = ci_mc_load_microcode(rdev);
7455 if (r) {
7456 DRM_ERROR("Failed to load MC firmware!\n");
7457 return r;
7458 }
7459 }
7460
7461 r = r600_vram_scratch_init(rdev);
7462 if (r)
7463 return r;
7464
7bf94a2c
AD
7465 r = cik_pcie_gart_enable(rdev);
7466 if (r)
7467 return r;
7468 cik_gpu_init(rdev);
7469
7470 /* allocate rlc buffers */
22c775ce
AD
7471 if (rdev->flags & RADEON_IS_IGP) {
7472 if (rdev->family == CHIP_KAVERI) {
7473 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7474 rdev->rlc.reg_list_size =
7475 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7476 } else {
7477 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7478 rdev->rlc.reg_list_size =
7479 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7480 }
7481 }
7482 rdev->rlc.cs_data = ci_cs_data;
7483 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
1fd11777 7484 r = sumo_rlc_init(rdev);
7bf94a2c
AD
7485 if (r) {
7486 DRM_ERROR("Failed to init rlc BOs!\n");
7487 return r;
7488 }
7489
7490 /* allocate wb buffer */
7491 r = radeon_wb_init(rdev);
7492 if (r)
7493 return r;
7494
963e81f9
AD
7495 /* allocate mec buffers */
7496 r = cik_mec_init(rdev);
7497 if (r) {
7498 DRM_ERROR("Failed to init MEC BOs!\n");
7499 return r;
7500 }
7501
7bf94a2c
AD
7502 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7503 if (r) {
7504 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7505 return r;
7506 }
7507
963e81f9
AD
7508 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7509 if (r) {
7510 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7511 return r;
7512 }
7513
7514 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7515 if (r) {
7516 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7517 return r;
7518 }
7519
7bf94a2c
AD
7520 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7521 if (r) {
7522 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7523 return r;
7524 }
7525
7526 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7527 if (r) {
7528 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7529 return r;
7530 }
7531
87167bb1
CK
7532 r = cik_uvd_resume(rdev);
7533 if (!r) {
7534 r = radeon_fence_driver_start_ring(rdev,
7535 R600_RING_TYPE_UVD_INDEX);
7536 if (r)
7537 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7538 }
7539 if (r)
7540 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7541
7bf94a2c
AD
7542 /* Enable IRQ */
7543 if (!rdev->irq.installed) {
7544 r = radeon_irq_kms_init(rdev);
7545 if (r)
7546 return r;
7547 }
7548
7549 r = cik_irq_init(rdev);
7550 if (r) {
7551 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7552 radeon_irq_kms_fini(rdev);
7553 return r;
7554 }
7555 cik_irq_set(rdev);
7556
7557 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7558 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7559 CP_RB0_RPTR, CP_RB0_WPTR,
7560 0, 0xfffff, RADEON_CP_PACKET2);
7561 if (r)
7562 return r;
7563
963e81f9 7564 /* set up the compute queues */
2615b53a 7565 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
7566 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7567 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7568 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2615b53a 7569 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
7570 if (r)
7571 return r;
7572 ring->me = 1; /* first MEC */
7573 ring->pipe = 0; /* first pipe */
7574 ring->queue = 0; /* first queue */
7575 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7576
2615b53a 7577 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
7578 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7579 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7580 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2615b53a 7581 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
7582 if (r)
7583 return r;
7584 /* dGPU only have 1 MEC */
7585 ring->me = 1; /* first MEC */
7586 ring->pipe = 0; /* first pipe */
7587 ring->queue = 1; /* second queue */
7588 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7589
7bf94a2c
AD
7590 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7591 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7592 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7593 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7594 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7595 if (r)
7596 return r;
7597
7598 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7599 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7600 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7601 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7602 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7603 if (r)
7604 return r;
7605
7606 r = cik_cp_resume(rdev);
7607 if (r)
7608 return r;
7609
7610 r = cik_sdma_resume(rdev);
7611 if (r)
7612 return r;
7613
87167bb1
CK
7614 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7615 if (ring->ring_size) {
7616 r = radeon_ring_init(rdev, ring, ring->ring_size,
7617 R600_WB_UVD_RPTR_OFFSET,
7618 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7619 0, 0xfffff, RADEON_CP_PACKET2);
7620 if (!r)
7621 r = r600_uvd_init(rdev);
7622 if (r)
7623 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7624 }
7625
7bf94a2c
AD
7626 r = radeon_ib_pool_init(rdev);
7627 if (r) {
7628 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7629 return r;
7630 }
7631
7632 r = radeon_vm_manager_init(rdev);
7633 if (r) {
7634 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7635 return r;
7636 }
7637
7638 return 0;
7639}
7640
7641/**
7642 * cik_resume - resume the asic to a functional state
7643 *
7644 * @rdev: radeon_device pointer
7645 *
7646 * Programs the asic to a functional state (CIK).
7647 * Called at resume.
7648 * Returns 0 for success, error for failure.
7649 */
7650int cik_resume(struct radeon_device *rdev)
7651{
7652 int r;
7653
7654 /* post card */
7655 atom_asic_init(rdev->mode_info.atom_context);
7656
0aafd313
AD
7657 /* init golden registers */
7658 cik_init_golden_registers(rdev);
7659
7bf94a2c
AD
7660 rdev->accel_working = true;
7661 r = cik_startup(rdev);
7662 if (r) {
7663 DRM_ERROR("cik startup failed on resume\n");
7664 rdev->accel_working = false;
7665 return r;
7666 }
7667
7668 return r;
7669
7670}
7671
7672/**
7673 * cik_suspend - suspend the asic
7674 *
7675 * @rdev: radeon_device pointer
7676 *
7677 * Bring the chip into a state suitable for suspend (CIK).
7678 * Called at suspend.
7679 * Returns 0 for success.
7680 */
7681int cik_suspend(struct radeon_device *rdev)
7682{
7683 radeon_vm_manager_fini(rdev);
7684 cik_cp_enable(rdev, false);
7685 cik_sdma_enable(rdev, false);
2858c00d 7686 r600_uvd_stop(rdev);
87167bb1 7687 radeon_uvd_suspend(rdev);
7bf94a2c
AD
7688 cik_irq_suspend(rdev);
7689 radeon_wb_disable(rdev);
7690 cik_pcie_gart_disable(rdev);
7691 return 0;
7692}
7693
7694/* Plan is to move initialization in that function and use
7695 * helper function so that radeon_device_init pretty much
7696 * do nothing more than calling asic specific function. This
7697 * should also allow to remove a bunch of callback function
7698 * like vram_info.
7699 */
7700/**
7701 * cik_init - asic specific driver and hw init
7702 *
7703 * @rdev: radeon_device pointer
7704 *
7705 * Setup asic specific driver variables and program the hw
7706 * to a functional state (CIK).
7707 * Called at driver startup.
7708 * Returns 0 for success, errors for failure.
7709 */
7710int cik_init(struct radeon_device *rdev)
7711{
7712 struct radeon_ring *ring;
7713 int r;
7714
7715 /* Read BIOS */
7716 if (!radeon_get_bios(rdev)) {
7717 if (ASIC_IS_AVIVO(rdev))
7718 return -EINVAL;
7719 }
7720 /* Must be an ATOMBIOS */
7721 if (!rdev->is_atom_bios) {
7722 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7723 return -EINVAL;
7724 }
7725 r = radeon_atombios_init(rdev);
7726 if (r)
7727 return r;
7728
7729 /* Post card if necessary */
7730 if (!radeon_card_posted(rdev)) {
7731 if (!rdev->bios) {
7732 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7733 return -EINVAL;
7734 }
7735 DRM_INFO("GPU not posted. posting now...\n");
7736 atom_asic_init(rdev->mode_info.atom_context);
7737 }
0aafd313
AD
7738 /* init golden registers */
7739 cik_init_golden_registers(rdev);
7bf94a2c
AD
7740 /* Initialize scratch registers */
7741 cik_scratch_init(rdev);
7742 /* Initialize surface registers */
7743 radeon_surface_init(rdev);
7744 /* Initialize clocks */
7745 radeon_get_clock_info(rdev->ddev);
7746
7747 /* Fence driver */
7748 r = radeon_fence_driver_init(rdev);
7749 if (r)
7750 return r;
7751
7752 /* initialize memory controller */
7753 r = cik_mc_init(rdev);
7754 if (r)
7755 return r;
7756 /* Memory manager */
7757 r = radeon_bo_init(rdev);
7758 if (r)
7759 return r;
7760
7761 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7762 ring->ring_obj = NULL;
7763 r600_ring_init(rdev, ring, 1024 * 1024);
7764
963e81f9
AD
7765 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7766 ring->ring_obj = NULL;
7767 r600_ring_init(rdev, ring, 1024 * 1024);
7768 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7769 if (r)
7770 return r;
7771
7772 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7773 ring->ring_obj = NULL;
7774 r600_ring_init(rdev, ring, 1024 * 1024);
7775 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7776 if (r)
7777 return r;
7778
7bf94a2c
AD
7779 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7780 ring->ring_obj = NULL;
7781 r600_ring_init(rdev, ring, 256 * 1024);
7782
7783 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7784 ring->ring_obj = NULL;
7785 r600_ring_init(rdev, ring, 256 * 1024);
7786
87167bb1
CK
7787 r = radeon_uvd_init(rdev);
7788 if (!r) {
7789 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7790 ring->ring_obj = NULL;
7791 r600_ring_init(rdev, ring, 4096);
7792 }
7793
7bf94a2c
AD
7794 rdev->ih.ring_obj = NULL;
7795 r600_ih_ring_init(rdev, 64 * 1024);
7796
7797 r = r600_pcie_gart_init(rdev);
7798 if (r)
7799 return r;
7800
7801 rdev->accel_working = true;
7802 r = cik_startup(rdev);
7803 if (r) {
7804 dev_err(rdev->dev, "disabling GPU acceleration\n");
7805 cik_cp_fini(rdev);
7806 cik_sdma_fini(rdev);
7807 cik_irq_fini(rdev);
1fd11777 7808 sumo_rlc_fini(rdev);
963e81f9 7809 cik_mec_fini(rdev);
7bf94a2c
AD
7810 radeon_wb_fini(rdev);
7811 radeon_ib_pool_fini(rdev);
7812 radeon_vm_manager_fini(rdev);
7813 radeon_irq_kms_fini(rdev);
7814 cik_pcie_gart_fini(rdev);
7815 rdev->accel_working = false;
7816 }
7817
7818 /* Don't start up if the MC ucode is missing.
7819 * The default clocks and voltages before the MC ucode
7820 * is loaded are not suffient for advanced operations.
7821 */
7822 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7823 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7824 return -EINVAL;
7825 }
7826
7827 return 0;
7828}
7829
7830/**
7831 * cik_fini - asic specific driver and hw fini
7832 *
7833 * @rdev: radeon_device pointer
7834 *
7835 * Tear down the asic specific driver variables and program the hw
7836 * to an idle state (CIK).
7837 * Called at driver unload.
7838 */
7839void cik_fini(struct radeon_device *rdev)
7840{
7841 cik_cp_fini(rdev);
7842 cik_sdma_fini(rdev);
7843 cik_irq_fini(rdev);
1fd11777 7844 sumo_rlc_fini(rdev);
963e81f9 7845 cik_mec_fini(rdev);
7bf94a2c
AD
7846 radeon_wb_fini(rdev);
7847 radeon_vm_manager_fini(rdev);
7848 radeon_ib_pool_fini(rdev);
7849 radeon_irq_kms_fini(rdev);
2858c00d 7850 r600_uvd_stop(rdev);
87167bb1 7851 radeon_uvd_fini(rdev);
7bf94a2c
AD
7852 cik_pcie_gart_fini(rdev);
7853 r600_vram_scratch_fini(rdev);
7854 radeon_gem_fini(rdev);
7855 radeon_fence_driver_fini(rdev);
7856 radeon_bo_fini(rdev);
7857 radeon_atombios_fini(rdev);
7858 kfree(rdev->bios);
7859 rdev->bios = NULL;
7860}
cd84a27d
AD
7861
7862/* display watermark setup */
7863/**
7864 * dce8_line_buffer_adjust - Set up the line buffer
7865 *
7866 * @rdev: radeon_device pointer
7867 * @radeon_crtc: the selected display controller
7868 * @mode: the current display mode on the selected display
7869 * controller
7870 *
7871 * Setup up the line buffer allocation for
7872 * the selected display controller (CIK).
7873 * Returns the line buffer size in pixels.
7874 */
7875static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7876 struct radeon_crtc *radeon_crtc,
7877 struct drm_display_mode *mode)
7878{
7879 u32 tmp;
7880
7881 /*
7882 * Line Buffer Setup
7883 * There are 6 line buffers, one for each display controllers.
7884 * There are 3 partitions per LB. Select the number of partitions
7885 * to enable based on the display width. For display widths larger
7886 * than 4096, you need use to use 2 display controllers and combine
7887 * them using the stereo blender.
7888 */
7889 if (radeon_crtc->base.enabled && mode) {
7890 if (mode->crtc_hdisplay < 1920)
7891 tmp = 1;
7892 else if (mode->crtc_hdisplay < 2560)
7893 tmp = 2;
7894 else if (mode->crtc_hdisplay < 4096)
7895 tmp = 0;
7896 else {
7897 DRM_DEBUG_KMS("Mode too big for LB!\n");
7898 tmp = 0;
7899 }
7900 } else
7901 tmp = 1;
7902
7903 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7904 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7905
7906 if (radeon_crtc->base.enabled && mode) {
7907 switch (tmp) {
7908 case 0:
7909 default:
7910 return 4096 * 2;
7911 case 1:
7912 return 1920 * 2;
7913 case 2:
7914 return 2560 * 2;
7915 }
7916 }
7917
7918 /* controller not enabled, so no lb used */
7919 return 0;
7920}
7921
7922/**
7923 * cik_get_number_of_dram_channels - get the number of dram channels
7924 *
7925 * @rdev: radeon_device pointer
7926 *
7927 * Look up the number of video ram channels (CIK).
7928 * Used for display watermark bandwidth calculations
7929 * Returns the number of dram channels
7930 */
7931static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7932{
7933 u32 tmp = RREG32(MC_SHARED_CHMAP);
7934
7935 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7936 case 0:
7937 default:
7938 return 1;
7939 case 1:
7940 return 2;
7941 case 2:
7942 return 4;
7943 case 3:
7944 return 8;
7945 case 4:
7946 return 3;
7947 case 5:
7948 return 6;
7949 case 6:
7950 return 10;
7951 case 7:
7952 return 12;
7953 case 8:
7954 return 16;
7955 }
7956}
7957
7958struct dce8_wm_params {
7959 u32 dram_channels; /* number of dram channels */
7960 u32 yclk; /* bandwidth per dram data pin in kHz */
7961 u32 sclk; /* engine clock in kHz */
7962 u32 disp_clk; /* display clock in kHz */
7963 u32 src_width; /* viewport width */
7964 u32 active_time; /* active display time in ns */
7965 u32 blank_time; /* blank time in ns */
7966 bool interlaced; /* mode is interlaced */
7967 fixed20_12 vsc; /* vertical scale ratio */
7968 u32 num_heads; /* number of active crtcs */
7969 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7970 u32 lb_size; /* line buffer allocated to pipe */
7971 u32 vtaps; /* vertical scaler taps */
7972};
7973
7974/**
7975 * dce8_dram_bandwidth - get the dram bandwidth
7976 *
7977 * @wm: watermark calculation data
7978 *
7979 * Calculate the raw dram bandwidth (CIK).
7980 * Used for display watermark bandwidth calculations
7981 * Returns the dram bandwidth in MBytes/s
7982 */
7983static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7984{
7985 /* Calculate raw DRAM Bandwidth */
7986 fixed20_12 dram_efficiency; /* 0.7 */
7987 fixed20_12 yclk, dram_channels, bandwidth;
7988 fixed20_12 a;
7989
7990 a.full = dfixed_const(1000);
7991 yclk.full = dfixed_const(wm->yclk);
7992 yclk.full = dfixed_div(yclk, a);
7993 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7994 a.full = dfixed_const(10);
7995 dram_efficiency.full = dfixed_const(7);
7996 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7997 bandwidth.full = dfixed_mul(dram_channels, yclk);
7998 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7999
8000 return dfixed_trunc(bandwidth);
8001}
8002
8003/**
8004 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8005 *
8006 * @wm: watermark calculation data
8007 *
8008 * Calculate the dram bandwidth used for display (CIK).
8009 * Used for display watermark bandwidth calculations
8010 * Returns the dram bandwidth for display in MBytes/s
8011 */
8012static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8013{
8014 /* Calculate DRAM Bandwidth and the part allocated to display. */
8015 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8016 fixed20_12 yclk, dram_channels, bandwidth;
8017 fixed20_12 a;
8018
8019 a.full = dfixed_const(1000);
8020 yclk.full = dfixed_const(wm->yclk);
8021 yclk.full = dfixed_div(yclk, a);
8022 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8023 a.full = dfixed_const(10);
8024 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8025 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8026 bandwidth.full = dfixed_mul(dram_channels, yclk);
8027 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8028
8029 return dfixed_trunc(bandwidth);
8030}
8031
8032/**
8033 * dce8_data_return_bandwidth - get the data return bandwidth
8034 *
8035 * @wm: watermark calculation data
8036 *
8037 * Calculate the data return bandwidth used for display (CIK).
8038 * Used for display watermark bandwidth calculations
8039 * Returns the data return bandwidth in MBytes/s
8040 */
8041static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8042{
8043 /* Calculate the display Data return Bandwidth */
8044 fixed20_12 return_efficiency; /* 0.8 */
8045 fixed20_12 sclk, bandwidth;
8046 fixed20_12 a;
8047
8048 a.full = dfixed_const(1000);
8049 sclk.full = dfixed_const(wm->sclk);
8050 sclk.full = dfixed_div(sclk, a);
8051 a.full = dfixed_const(10);
8052 return_efficiency.full = dfixed_const(8);
8053 return_efficiency.full = dfixed_div(return_efficiency, a);
8054 a.full = dfixed_const(32);
8055 bandwidth.full = dfixed_mul(a, sclk);
8056 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8057
8058 return dfixed_trunc(bandwidth);
8059}
8060
8061/**
8062 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8063 *
8064 * @wm: watermark calculation data
8065 *
8066 * Calculate the dmif bandwidth used for display (CIK).
8067 * Used for display watermark bandwidth calculations
8068 * Returns the dmif bandwidth in MBytes/s
8069 */
8070static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8071{
8072 /* Calculate the DMIF Request Bandwidth */
8073 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8074 fixed20_12 disp_clk, bandwidth;
8075 fixed20_12 a, b;
8076
8077 a.full = dfixed_const(1000);
8078 disp_clk.full = dfixed_const(wm->disp_clk);
8079 disp_clk.full = dfixed_div(disp_clk, a);
8080 a.full = dfixed_const(32);
8081 b.full = dfixed_mul(a, disp_clk);
8082
8083 a.full = dfixed_const(10);
8084 disp_clk_request_efficiency.full = dfixed_const(8);
8085 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8086
8087 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8088
8089 return dfixed_trunc(bandwidth);
8090}
8091
8092/**
8093 * dce8_available_bandwidth - get the min available bandwidth
8094 *
8095 * @wm: watermark calculation data
8096 *
8097 * Calculate the min available bandwidth used for display (CIK).
8098 * Used for display watermark bandwidth calculations
8099 * Returns the min available bandwidth in MBytes/s
8100 */
8101static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8102{
8103 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8104 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8105 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8106 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8107
8108 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8109}
8110
8111/**
8112 * dce8_average_bandwidth - get the average available bandwidth
8113 *
8114 * @wm: watermark calculation data
8115 *
8116 * Calculate the average available bandwidth used for display (CIK).
8117 * Used for display watermark bandwidth calculations
8118 * Returns the average available bandwidth in MBytes/s
8119 */
8120static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8121{
8122 /* Calculate the display mode Average Bandwidth
8123 * DisplayMode should contain the source and destination dimensions,
8124 * timing, etc.
8125 */
8126 fixed20_12 bpp;
8127 fixed20_12 line_time;
8128 fixed20_12 src_width;
8129 fixed20_12 bandwidth;
8130 fixed20_12 a;
8131
8132 a.full = dfixed_const(1000);
8133 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8134 line_time.full = dfixed_div(line_time, a);
8135 bpp.full = dfixed_const(wm->bytes_per_pixel);
8136 src_width.full = dfixed_const(wm->src_width);
8137 bandwidth.full = dfixed_mul(src_width, bpp);
8138 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8139 bandwidth.full = dfixed_div(bandwidth, line_time);
8140
8141 return dfixed_trunc(bandwidth);
8142}
8143
8144/**
8145 * dce8_latency_watermark - get the latency watermark
8146 *
8147 * @wm: watermark calculation data
8148 *
8149 * Calculate the latency watermark (CIK).
8150 * Used for display watermark bandwidth calculations
8151 * Returns the latency watermark in ns
8152 */
8153static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8154{
8155 /* First calculate the latency in ns */
8156 u32 mc_latency = 2000; /* 2000 ns. */
8157 u32 available_bandwidth = dce8_available_bandwidth(wm);
8158 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8159 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8160 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8161 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8162 (wm->num_heads * cursor_line_pair_return_time);
8163 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8164 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8165 u32 tmp, dmif_size = 12288;
8166 fixed20_12 a, b, c;
8167
8168 if (wm->num_heads == 0)
8169 return 0;
8170
8171 a.full = dfixed_const(2);
8172 b.full = dfixed_const(1);
8173 if ((wm->vsc.full > a.full) ||
8174 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8175 (wm->vtaps >= 5) ||
8176 ((wm->vsc.full >= a.full) && wm->interlaced))
8177 max_src_lines_per_dst_line = 4;
8178 else
8179 max_src_lines_per_dst_line = 2;
8180
8181 a.full = dfixed_const(available_bandwidth);
8182 b.full = dfixed_const(wm->num_heads);
8183 a.full = dfixed_div(a, b);
8184
8185 b.full = dfixed_const(mc_latency + 512);
8186 c.full = dfixed_const(wm->disp_clk);
8187 b.full = dfixed_div(b, c);
8188
8189 c.full = dfixed_const(dmif_size);
8190 b.full = dfixed_div(c, b);
8191
8192 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8193
8194 b.full = dfixed_const(1000);
8195 c.full = dfixed_const(wm->disp_clk);
8196 b.full = dfixed_div(c, b);
8197 c.full = dfixed_const(wm->bytes_per_pixel);
8198 b.full = dfixed_mul(b, c);
8199
8200 lb_fill_bw = min(tmp, dfixed_trunc(b));
8201
8202 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8203 b.full = dfixed_const(1000);
8204 c.full = dfixed_const(lb_fill_bw);
8205 b.full = dfixed_div(c, b);
8206 a.full = dfixed_div(a, b);
8207 line_fill_time = dfixed_trunc(a);
8208
8209 if (line_fill_time < wm->active_time)
8210 return latency;
8211 else
8212 return latency + (line_fill_time - wm->active_time);
8213
8214}
8215
8216/**
8217 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8218 * average and available dram bandwidth
8219 *
8220 * @wm: watermark calculation data
8221 *
8222 * Check if the display average bandwidth fits in the display
8223 * dram bandwidth (CIK).
8224 * Used for display watermark bandwidth calculations
8225 * Returns true if the display fits, false if not.
8226 */
8227static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8228{
8229 if (dce8_average_bandwidth(wm) <=
8230 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8231 return true;
8232 else
8233 return false;
8234}
8235
8236/**
8237 * dce8_average_bandwidth_vs_available_bandwidth - check
8238 * average and available bandwidth
8239 *
8240 * @wm: watermark calculation data
8241 *
8242 * Check if the display average bandwidth fits in the display
8243 * available bandwidth (CIK).
8244 * Used for display watermark bandwidth calculations
8245 * Returns true if the display fits, false if not.
8246 */
8247static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8248{
8249 if (dce8_average_bandwidth(wm) <=
8250 (dce8_available_bandwidth(wm) / wm->num_heads))
8251 return true;
8252 else
8253 return false;
8254}
8255
8256/**
8257 * dce8_check_latency_hiding - check latency hiding
8258 *
8259 * @wm: watermark calculation data
8260 *
8261 * Check latency hiding (CIK).
8262 * Used for display watermark bandwidth calculations
8263 * Returns true if the display fits, false if not.
8264 */
8265static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8266{
8267 u32 lb_partitions = wm->lb_size / wm->src_width;
8268 u32 line_time = wm->active_time + wm->blank_time;
8269 u32 latency_tolerant_lines;
8270 u32 latency_hiding;
8271 fixed20_12 a;
8272
8273 a.full = dfixed_const(1);
8274 if (wm->vsc.full > a.full)
8275 latency_tolerant_lines = 1;
8276 else {
8277 if (lb_partitions <= (wm->vtaps + 1))
8278 latency_tolerant_lines = 1;
8279 else
8280 latency_tolerant_lines = 2;
8281 }
8282
8283 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8284
8285 if (dce8_latency_watermark(wm) <= latency_hiding)
8286 return true;
8287 else
8288 return false;
8289}
8290
8291/**
8292 * dce8_program_watermarks - program display watermarks
8293 *
8294 * @rdev: radeon_device pointer
8295 * @radeon_crtc: the selected display controller
8296 * @lb_size: line buffer size
8297 * @num_heads: number of display controllers in use
8298 *
8299 * Calculate and program the display watermarks for the
8300 * selected display controller (CIK).
8301 */
8302static void dce8_program_watermarks(struct radeon_device *rdev,
8303 struct radeon_crtc *radeon_crtc,
8304 u32 lb_size, u32 num_heads)
8305{
8306 struct drm_display_mode *mode = &radeon_crtc->base.mode;
58ea2dea 8307 struct dce8_wm_params wm_low, wm_high;
cd84a27d
AD
8308 u32 pixel_period;
8309 u32 line_time = 0;
8310 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8311 u32 tmp, wm_mask;
8312
8313 if (radeon_crtc->base.enabled && num_heads && mode) {
8314 pixel_period = 1000000 / (u32)mode->clock;
8315 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8316
58ea2dea
AD
8317 /* watermark for high clocks */
8318 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8319 rdev->pm.dpm_enabled) {
8320 wm_high.yclk =
8321 radeon_dpm_get_mclk(rdev, false) * 10;
8322 wm_high.sclk =
8323 radeon_dpm_get_sclk(rdev, false) * 10;
8324 } else {
8325 wm_high.yclk = rdev->pm.current_mclk * 10;
8326 wm_high.sclk = rdev->pm.current_sclk * 10;
8327 }
8328
8329 wm_high.disp_clk = mode->clock;
8330 wm_high.src_width = mode->crtc_hdisplay;
8331 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8332 wm_high.blank_time = line_time - wm_high.active_time;
8333 wm_high.interlaced = false;
cd84a27d 8334 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
58ea2dea
AD
8335 wm_high.interlaced = true;
8336 wm_high.vsc = radeon_crtc->vsc;
8337 wm_high.vtaps = 1;
cd84a27d 8338 if (radeon_crtc->rmx_type != RMX_OFF)
58ea2dea
AD
8339 wm_high.vtaps = 2;
8340 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8341 wm_high.lb_size = lb_size;
8342 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8343 wm_high.num_heads = num_heads;
cd84a27d
AD
8344
8345 /* set for high clocks */
58ea2dea
AD
8346 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8347
8348 /* possibly force display priority to high */
8349 /* should really do this at mode validation time... */
8350 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8351 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8352 !dce8_check_latency_hiding(&wm_high) ||
8353 (rdev->disp_priority == 2)) {
8354 DRM_DEBUG_KMS("force priority to high\n");
8355 }
8356
8357 /* watermark for low clocks */
8358 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8359 rdev->pm.dpm_enabled) {
8360 wm_low.yclk =
8361 radeon_dpm_get_mclk(rdev, true) * 10;
8362 wm_low.sclk =
8363 radeon_dpm_get_sclk(rdev, true) * 10;
8364 } else {
8365 wm_low.yclk = rdev->pm.current_mclk * 10;
8366 wm_low.sclk = rdev->pm.current_sclk * 10;
8367 }
8368
8369 wm_low.disp_clk = mode->clock;
8370 wm_low.src_width = mode->crtc_hdisplay;
8371 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8372 wm_low.blank_time = line_time - wm_low.active_time;
8373 wm_low.interlaced = false;
8374 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8375 wm_low.interlaced = true;
8376 wm_low.vsc = radeon_crtc->vsc;
8377 wm_low.vtaps = 1;
8378 if (radeon_crtc->rmx_type != RMX_OFF)
8379 wm_low.vtaps = 2;
8380 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8381 wm_low.lb_size = lb_size;
8382 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8383 wm_low.num_heads = num_heads;
8384
cd84a27d 8385 /* set for low clocks */
58ea2dea 8386 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
cd84a27d
AD
8387
8388 /* possibly force display priority to high */
8389 /* should really do this at mode validation time... */
58ea2dea
AD
8390 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8391 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8392 !dce8_check_latency_hiding(&wm_low) ||
cd84a27d
AD
8393 (rdev->disp_priority == 2)) {
8394 DRM_DEBUG_KMS("force priority to high\n");
8395 }
8396 }
8397
8398 /* select wm A */
8399 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8400 tmp = wm_mask;
8401 tmp &= ~LATENCY_WATERMARK_MASK(3);
8402 tmp |= LATENCY_WATERMARK_MASK(1);
8403 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8404 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8405 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8406 LATENCY_HIGH_WATERMARK(line_time)));
8407 /* select wm B */
8408 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8409 tmp &= ~LATENCY_WATERMARK_MASK(3);
8410 tmp |= LATENCY_WATERMARK_MASK(2);
8411 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8412 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8413 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8414 LATENCY_HIGH_WATERMARK(line_time)));
8415 /* restore original selection */
8416 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
58ea2dea
AD
8417
8418 /* save values for DPM */
8419 radeon_crtc->line_time = line_time;
8420 radeon_crtc->wm_high = latency_watermark_a;
8421 radeon_crtc->wm_low = latency_watermark_b;
cd84a27d
AD
8422}
8423
8424/**
8425 * dce8_bandwidth_update - program display watermarks
8426 *
8427 * @rdev: radeon_device pointer
8428 *
8429 * Calculate and program the display watermarks and line
8430 * buffer allocation (CIK).
8431 */
8432void dce8_bandwidth_update(struct radeon_device *rdev)
8433{
8434 struct drm_display_mode *mode = NULL;
8435 u32 num_heads = 0, lb_size;
8436 int i;
8437
8438 radeon_update_display_priority(rdev);
8439
8440 for (i = 0; i < rdev->num_crtc; i++) {
8441 if (rdev->mode_info.crtcs[i]->base.enabled)
8442 num_heads++;
8443 }
8444 for (i = 0; i < rdev->num_crtc; i++) {
8445 mode = &rdev->mode_info.crtcs[i]->base.mode;
8446 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8447 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8448 }
8449}
44fa346f
AD
8450
8451/**
8452 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8453 *
8454 * @rdev: radeon_device pointer
8455 *
8456 * Fetches a GPU clock counter snapshot (SI).
8457 * Returns the 64 bit clock counter snapshot.
8458 */
8459uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8460{
8461 uint64_t clock;
8462
8463 mutex_lock(&rdev->gpu_clock_mutex);
8464 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8465 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8466 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8467 mutex_unlock(&rdev->gpu_clock_mutex);
8468 return clock;
8469}
8470
87167bb1
CK
8471static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8472 u32 cntl_reg, u32 status_reg)
8473{
8474 int r, i;
8475 struct atom_clock_dividers dividers;
8476 uint32_t tmp;
8477
8478 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8479 clock, false, &dividers);
8480 if (r)
8481 return r;
8482
8483 tmp = RREG32_SMC(cntl_reg);
8484 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8485 tmp |= dividers.post_divider;
8486 WREG32_SMC(cntl_reg, tmp);
8487
8488 for (i = 0; i < 100; i++) {
8489 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8490 break;
8491 mdelay(10);
8492 }
8493 if (i == 100)
8494 return -ETIMEDOUT;
8495
8496 return 0;
8497}
8498
8499int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8500{
8501 int r = 0;
8502
8503 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8504 if (r)
8505 return r;
8506
8507 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8508 return r;
8509}
8510
8511int cik_uvd_resume(struct radeon_device *rdev)
8512{
8513 uint64_t addr;
8514 uint32_t size;
8515 int r;
8516
8517 r = radeon_uvd_resume(rdev);
8518 if (r)
8519 return r;
8520
8521 /* programm the VCPU memory controller bits 0-27 */
8522 addr = rdev->uvd.gpu_addr >> 3;
4ad9c1c7 8523 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
87167bb1
CK
8524 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
8525 WREG32(UVD_VCPU_CACHE_SIZE0, size);
8526
8527 addr += size;
8528 size = RADEON_UVD_STACK_SIZE >> 3;
8529 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
8530 WREG32(UVD_VCPU_CACHE_SIZE1, size);
8531
8532 addr += size;
8533 size = RADEON_UVD_HEAP_SIZE >> 3;
8534 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
8535 WREG32(UVD_VCPU_CACHE_SIZE2, size);
8536
8537 /* bits 28-31 */
8538 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
8539 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
8540
8541 /* bits 32-39 */
8542 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
8543 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
8544
8545 return 0;
8546}
8a7cd276
AD
8547
8548static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8549{
8550 struct pci_dev *root = rdev->pdev->bus->self;
8551 int bridge_pos, gpu_pos;
8552 u32 speed_cntl, mask, current_data_rate;
8553 int ret, i;
8554 u16 tmp16;
8555
8556 if (radeon_pcie_gen2 == 0)
8557 return;
8558
8559 if (rdev->flags & RADEON_IS_IGP)
8560 return;
8561
8562 if (!(rdev->flags & RADEON_IS_PCIE))
8563 return;
8564
8565 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8566 if (ret != 0)
8567 return;
8568
8569 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8570 return;
8571
8572 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8573 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8574 LC_CURRENT_DATA_RATE_SHIFT;
8575 if (mask & DRM_PCIE_SPEED_80) {
8576 if (current_data_rate == 2) {
8577 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8578 return;
8579 }
8580 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8581 } else if (mask & DRM_PCIE_SPEED_50) {
8582 if (current_data_rate == 1) {
8583 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8584 return;
8585 }
8586 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8587 }
8588
8589 bridge_pos = pci_pcie_cap(root);
8590 if (!bridge_pos)
8591 return;
8592
8593 gpu_pos = pci_pcie_cap(rdev->pdev);
8594 if (!gpu_pos)
8595 return;
8596
8597 if (mask & DRM_PCIE_SPEED_80) {
8598 /* re-try equalization if gen3 is not already enabled */
8599 if (current_data_rate != 2) {
8600 u16 bridge_cfg, gpu_cfg;
8601 u16 bridge_cfg2, gpu_cfg2;
8602 u32 max_lw, current_lw, tmp;
8603
8604 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8605 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8606
8607 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8608 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8609
8610 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8611 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8612
8613 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8614 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8615 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8616
8617 if (current_lw < max_lw) {
8618 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8619 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8620 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8621 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8622 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8623 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8624 }
8625 }
8626
8627 for (i = 0; i < 10; i++) {
8628 /* check status */
8629 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8630 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8631 break;
8632
8633 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8634 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8635
8636 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8637 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8638
8639 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8640 tmp |= LC_SET_QUIESCE;
8641 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8642
8643 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8644 tmp |= LC_REDO_EQ;
8645 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8646
8647 mdelay(100);
8648
8649 /* linkctl */
8650 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8651 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8652 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8653 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8654
8655 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8656 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8657 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8658 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8659
8660 /* linkctl2 */
8661 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8662 tmp16 &= ~((1 << 4) | (7 << 9));
8663 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8664 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8665
8666 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8667 tmp16 &= ~((1 << 4) | (7 << 9));
8668 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8669 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8670
8671 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8672 tmp &= ~LC_SET_QUIESCE;
8673 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8674 }
8675 }
8676 }
8677
8678 /* set the link speed */
8679 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8680 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8681 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8682
8683 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8684 tmp16 &= ~0xf;
8685 if (mask & DRM_PCIE_SPEED_80)
8686 tmp16 |= 3; /* gen3 */
8687 else if (mask & DRM_PCIE_SPEED_50)
8688 tmp16 |= 2; /* gen2 */
8689 else
8690 tmp16 |= 1; /* gen1 */
8691 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8692
8693 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8694 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8695 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8696
8697 for (i = 0; i < rdev->usec_timeout; i++) {
8698 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8699 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8700 break;
8701 udelay(1);
8702 }
8703}
7235711a
AD
8704
8705static void cik_program_aspm(struct radeon_device *rdev)
8706{
8707 u32 data, orig;
8708 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8709 bool disable_clkreq = false;
8710
8711 if (radeon_aspm == 0)
8712 return;
8713
8714 /* XXX double check IGPs */
8715 if (rdev->flags & RADEON_IS_IGP)
8716 return;
8717
8718 if (!(rdev->flags & RADEON_IS_PCIE))
8719 return;
8720
8721 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8722 data &= ~LC_XMIT_N_FTS_MASK;
8723 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8724 if (orig != data)
8725 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8726
8727 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8728 data |= LC_GO_TO_RECOVERY;
8729 if (orig != data)
8730 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8731
8732 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8733 data |= P_IGNORE_EDB_ERR;
8734 if (orig != data)
8735 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8736
8737 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8738 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8739 data |= LC_PMI_TO_L1_DIS;
8740 if (!disable_l0s)
8741 data |= LC_L0S_INACTIVITY(7);
8742
8743 if (!disable_l1) {
8744 data |= LC_L1_INACTIVITY(7);
8745 data &= ~LC_PMI_TO_L1_DIS;
8746 if (orig != data)
8747 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8748
8749 if (!disable_plloff_in_l1) {
8750 bool clk_req_support;
8751
8752 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8753 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8754 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8755 if (orig != data)
8756 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8757
8758 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8759 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8760 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8761 if (orig != data)
8762 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8763
8764 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8765 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8766 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8767 if (orig != data)
8768 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8769
8770 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8771 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8772 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8773 if (orig != data)
8774 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8775
8776 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8777 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8778 data |= LC_DYN_LANES_PWR_STATE(3);
8779 if (orig != data)
8780 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8781
8782 if (!disable_clkreq) {
8783 struct pci_dev *root = rdev->pdev->bus->self;
8784 u32 lnkcap;
8785
8786 clk_req_support = false;
8787 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8788 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8789 clk_req_support = true;
8790 } else {
8791 clk_req_support = false;
8792 }
8793
8794 if (clk_req_support) {
8795 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8796 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8797 if (orig != data)
8798 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8799
8800 orig = data = RREG32_SMC(THM_CLK_CNTL);
8801 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8802 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8803 if (orig != data)
8804 WREG32_SMC(THM_CLK_CNTL, data);
8805
8806 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8807 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8808 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8809 if (orig != data)
8810 WREG32_SMC(MISC_CLK_CTRL, data);
8811
8812 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8813 data &= ~BCLK_AS_XCLK;
8814 if (orig != data)
8815 WREG32_SMC(CG_CLKPIN_CNTL, data);
8816
8817 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8818 data &= ~FORCE_BIF_REFCLK_EN;
8819 if (orig != data)
8820 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8821
8822 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8823 data &= ~MPLL_CLKOUT_SEL_MASK;
8824 data |= MPLL_CLKOUT_SEL(4);
8825 if (orig != data)
8826 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8827 }
8828 }
8829 } else {
8830 if (orig != data)
8831 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8832 }
8833
8834 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8835 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8836 if (orig != data)
8837 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8838
8839 if (!disable_l0s) {
8840 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8841 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8842 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8843 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8844 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8845 data &= ~LC_L0S_INACTIVITY_MASK;
8846 if (orig != data)
8847 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8848 }
8849 }
8850 }
8851}