drm/radeon: add get_xclk() callback for CIK
[linux-2.6-block.git] / drivers / gpu / drm / radeon / cik.c
CommitLineData
8cc1a532
AD
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
6f2043ce 30#include "radeon_asic.h"
8cc1a532
AD
31#include "cikd.h"
32#include "atom.h"
841cf442 33#include "cik_blit_shaders.h"
8cc1a532 34
02c81327
AD
35/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
21a93e13
AD
47/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
02c81327
AD
50
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
21a93e13 57MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
02c81327
AD
58MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
21a93e13 63MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
02c81327
AD
64MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
21a93e13 69MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
02c81327 70
a59781bb
AD
71extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
6f2043ce
AD
73extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
cc066715 75extern bool evergreen_is_display_hung(struct radeon_device *rdev);
1c49165d 76extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
7bf94a2c
AD
77extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
cc066715 79static void cik_rlc_stop(struct radeon_device *rdev);
6f2043ce 80
2c67912c
AD
81/**
82 * cik_get_xclk - get the xclk
83 *
84 * @rdev: radeon_device pointer
85 *
86 * Returns the reference clock used by the gfx engine
87 * (CIK).
88 */
89u32 cik_get_xclk(struct radeon_device *rdev)
90{
91 u32 reference_clock = rdev->clock.spll.reference_freq;
92
93 if (rdev->flags & RADEON_IS_IGP) {
94 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
95 return reference_clock / 2;
96 } else {
97 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
98 return reference_clock / 4;
99 }
100 return reference_clock;
101}
102
bc8273fe
AD
103#define BONAIRE_IO_MC_REGS_SIZE 36
104
105static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
106{
107 {0x00000070, 0x04400000},
108 {0x00000071, 0x80c01803},
109 {0x00000072, 0x00004004},
110 {0x00000073, 0x00000100},
111 {0x00000074, 0x00ff0000},
112 {0x00000075, 0x34000000},
113 {0x00000076, 0x08000014},
114 {0x00000077, 0x00cc08ec},
115 {0x00000078, 0x00000400},
116 {0x00000079, 0x00000000},
117 {0x0000007a, 0x04090000},
118 {0x0000007c, 0x00000000},
119 {0x0000007e, 0x4408a8e8},
120 {0x0000007f, 0x00000304},
121 {0x00000080, 0x00000000},
122 {0x00000082, 0x00000001},
123 {0x00000083, 0x00000002},
124 {0x00000084, 0xf3e4f400},
125 {0x00000085, 0x052024e3},
126 {0x00000087, 0x00000000},
127 {0x00000088, 0x01000000},
128 {0x0000008a, 0x1c0a0000},
129 {0x0000008b, 0xff010000},
130 {0x0000008d, 0xffffefff},
131 {0x0000008e, 0xfff3efff},
132 {0x0000008f, 0xfff3efbf},
133 {0x00000092, 0xf7ffffff},
134 {0x00000093, 0xffffff7f},
135 {0x00000095, 0x00101101},
136 {0x00000096, 0x00000fff},
137 {0x00000097, 0x00116fff},
138 {0x00000098, 0x60010000},
139 {0x00000099, 0x10010000},
140 {0x0000009a, 0x00006000},
141 {0x0000009b, 0x00001000},
142 {0x0000009f, 0x00b48000}
143};
144
145/* ucode loading */
146/**
147 * ci_mc_load_microcode - load MC ucode into the hw
148 *
149 * @rdev: radeon_device pointer
150 *
151 * Load the GDDR MC ucode into the hw (CIK).
152 * Returns 0 on success, error on failure.
153 */
154static int ci_mc_load_microcode(struct radeon_device *rdev)
155{
156 const __be32 *fw_data;
157 u32 running, blackout = 0;
158 u32 *io_mc_regs;
159 int i, ucode_size, regs_size;
160
161 if (!rdev->mc_fw)
162 return -EINVAL;
163
164 switch (rdev->family) {
165 case CHIP_BONAIRE:
166 default:
167 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
168 ucode_size = CIK_MC_UCODE_SIZE;
169 regs_size = BONAIRE_IO_MC_REGS_SIZE;
170 break;
171 }
172
173 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
174
175 if (running == 0) {
176 if (running) {
177 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
178 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
179 }
180
181 /* reset the engine and set to writable */
182 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
183 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
184
185 /* load mc io regs */
186 for (i = 0; i < regs_size; i++) {
187 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
188 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
189 }
190 /* load the MC ucode */
191 fw_data = (const __be32 *)rdev->mc_fw->data;
192 for (i = 0; i < ucode_size; i++)
193 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
194
195 /* put the engine back into the active state */
196 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
197 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
198 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
199
200 /* wait for training to complete */
201 for (i = 0; i < rdev->usec_timeout; i++) {
202 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
203 break;
204 udelay(1);
205 }
206 for (i = 0; i < rdev->usec_timeout; i++) {
207 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
208 break;
209 udelay(1);
210 }
211
212 if (running)
213 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
214 }
215
216 return 0;
217}
218
02c81327
AD
219/**
220 * cik_init_microcode - load ucode images from disk
221 *
222 * @rdev: radeon_device pointer
223 *
224 * Use the firmware interface to load the ucode images into
225 * the driver (not loaded into hw).
226 * Returns 0 on success, error on failure.
227 */
228static int cik_init_microcode(struct radeon_device *rdev)
229{
230 struct platform_device *pdev;
231 const char *chip_name;
232 size_t pfp_req_size, me_req_size, ce_req_size,
21a93e13
AD
233 mec_req_size, rlc_req_size, mc_req_size,
234 sdma_req_size;
02c81327
AD
235 char fw_name[30];
236 int err;
237
238 DRM_DEBUG("\n");
239
240 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
241 err = IS_ERR(pdev);
242 if (err) {
243 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
244 return -EINVAL;
245 }
246
247 switch (rdev->family) {
248 case CHIP_BONAIRE:
249 chip_name = "BONAIRE";
250 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
251 me_req_size = CIK_ME_UCODE_SIZE * 4;
252 ce_req_size = CIK_CE_UCODE_SIZE * 4;
253 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
254 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
255 mc_req_size = CIK_MC_UCODE_SIZE * 4;
21a93e13 256 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
257 break;
258 case CHIP_KAVERI:
259 chip_name = "KAVERI";
260 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
261 me_req_size = CIK_ME_UCODE_SIZE * 4;
262 ce_req_size = CIK_CE_UCODE_SIZE * 4;
263 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
264 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
21a93e13 265 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
266 break;
267 case CHIP_KABINI:
268 chip_name = "KABINI";
269 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
270 me_req_size = CIK_ME_UCODE_SIZE * 4;
271 ce_req_size = CIK_CE_UCODE_SIZE * 4;
272 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
273 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
21a93e13 274 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
275 break;
276 default: BUG();
277 }
278
279 DRM_INFO("Loading %s Microcode\n", chip_name);
280
281 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
282 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
283 if (err)
284 goto out;
285 if (rdev->pfp_fw->size != pfp_req_size) {
286 printk(KERN_ERR
287 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
288 rdev->pfp_fw->size, fw_name);
289 err = -EINVAL;
290 goto out;
291 }
292
293 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
294 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
295 if (err)
296 goto out;
297 if (rdev->me_fw->size != me_req_size) {
298 printk(KERN_ERR
299 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
300 rdev->me_fw->size, fw_name);
301 err = -EINVAL;
302 }
303
304 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
305 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
306 if (err)
307 goto out;
308 if (rdev->ce_fw->size != ce_req_size) {
309 printk(KERN_ERR
310 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
311 rdev->ce_fw->size, fw_name);
312 err = -EINVAL;
313 }
314
315 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
316 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
317 if (err)
318 goto out;
319 if (rdev->mec_fw->size != mec_req_size) {
320 printk(KERN_ERR
321 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
322 rdev->mec_fw->size, fw_name);
323 err = -EINVAL;
324 }
325
326 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
327 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
328 if (err)
329 goto out;
330 if (rdev->rlc_fw->size != rlc_req_size) {
331 printk(KERN_ERR
332 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
333 rdev->rlc_fw->size, fw_name);
334 err = -EINVAL;
335 }
336
21a93e13
AD
337 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
338 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
339 if (err)
340 goto out;
341 if (rdev->sdma_fw->size != sdma_req_size) {
342 printk(KERN_ERR
343 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
344 rdev->sdma_fw->size, fw_name);
345 err = -EINVAL;
346 }
347
02c81327
AD
348 /* No MC ucode on APUs */
349 if (!(rdev->flags & RADEON_IS_IGP)) {
350 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
351 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
352 if (err)
353 goto out;
354 if (rdev->mc_fw->size != mc_req_size) {
355 printk(KERN_ERR
356 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
357 rdev->mc_fw->size, fw_name);
358 err = -EINVAL;
359 }
360 }
361
362out:
363 platform_device_unregister(pdev);
364
365 if (err) {
366 if (err != -EINVAL)
367 printk(KERN_ERR
368 "cik_cp: Failed to load firmware \"%s\"\n",
369 fw_name);
370 release_firmware(rdev->pfp_fw);
371 rdev->pfp_fw = NULL;
372 release_firmware(rdev->me_fw);
373 rdev->me_fw = NULL;
374 release_firmware(rdev->ce_fw);
375 rdev->ce_fw = NULL;
376 release_firmware(rdev->rlc_fw);
377 rdev->rlc_fw = NULL;
378 release_firmware(rdev->mc_fw);
379 rdev->mc_fw = NULL;
380 }
381 return err;
382}
383
8cc1a532
AD
384/*
385 * Core functions
386 */
387/**
388 * cik_tiling_mode_table_init - init the hw tiling table
389 *
390 * @rdev: radeon_device pointer
391 *
392 * Starting with SI, the tiling setup is done globally in a
393 * set of 32 tiling modes. Rather than selecting each set of
394 * parameters per surface as on older asics, we just select
395 * which index in the tiling table we want to use, and the
396 * surface uses those parameters (CIK).
397 */
398static void cik_tiling_mode_table_init(struct radeon_device *rdev)
399{
400 const u32 num_tile_mode_states = 32;
401 const u32 num_secondary_tile_mode_states = 16;
402 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
403 u32 num_pipe_configs;
404 u32 num_rbs = rdev->config.cik.max_backends_per_se *
405 rdev->config.cik.max_shader_engines;
406
407 switch (rdev->config.cik.mem_row_size_in_kb) {
408 case 1:
409 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
410 break;
411 case 2:
412 default:
413 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
414 break;
415 case 4:
416 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
417 break;
418 }
419
420 num_pipe_configs = rdev->config.cik.max_tile_pipes;
421 if (num_pipe_configs > 8)
422 num_pipe_configs = 8; /* ??? */
423
424 if (num_pipe_configs == 8) {
425 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
426 switch (reg_offset) {
427 case 0:
428 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
430 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
431 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
432 break;
433 case 1:
434 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
435 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
436 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
437 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
438 break;
439 case 2:
440 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
442 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
443 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
444 break;
445 case 3:
446 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
447 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
449 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
450 break;
451 case 4:
452 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
454 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
455 TILE_SPLIT(split_equal_to_row_size));
456 break;
457 case 5:
458 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
459 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
460 break;
461 case 6:
462 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
463 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
465 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
466 break;
467 case 7:
468 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
470 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
471 TILE_SPLIT(split_equal_to_row_size));
472 break;
473 case 8:
474 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
475 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
476 break;
477 case 9:
478 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
479 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
480 break;
481 case 10:
482 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
483 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
484 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
486 break;
487 case 11:
488 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
489 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
490 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
492 break;
493 case 12:
494 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
495 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
498 break;
499 case 13:
500 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
501 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
502 break;
503 case 14:
504 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
505 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
506 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
508 break;
509 case 16:
510 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
512 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
514 break;
515 case 17:
516 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
517 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
520 break;
521 case 27:
522 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
523 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
524 break;
525 case 28:
526 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
527 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
530 break;
531 case 29:
532 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
533 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
534 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
536 break;
537 case 30:
538 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
539 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
541 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
542 break;
543 default:
544 gb_tile_moden = 0;
545 break;
546 }
547 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
548 }
549 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
550 switch (reg_offset) {
551 case 0:
552 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
555 NUM_BANKS(ADDR_SURF_16_BANK));
556 break;
557 case 1:
558 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
561 NUM_BANKS(ADDR_SURF_16_BANK));
562 break;
563 case 2:
564 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
567 NUM_BANKS(ADDR_SURF_16_BANK));
568 break;
569 case 3:
570 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
573 NUM_BANKS(ADDR_SURF_16_BANK));
574 break;
575 case 4:
576 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
577 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
578 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
579 NUM_BANKS(ADDR_SURF_8_BANK));
580 break;
581 case 5:
582 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
585 NUM_BANKS(ADDR_SURF_4_BANK));
586 break;
587 case 6:
588 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
589 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
590 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
591 NUM_BANKS(ADDR_SURF_2_BANK));
592 break;
593 case 8:
594 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
597 NUM_BANKS(ADDR_SURF_16_BANK));
598 break;
599 case 9:
600 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
601 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
602 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
603 NUM_BANKS(ADDR_SURF_16_BANK));
604 break;
605 case 10:
606 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
607 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
608 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
609 NUM_BANKS(ADDR_SURF_16_BANK));
610 break;
611 case 11:
612 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
613 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
614 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
615 NUM_BANKS(ADDR_SURF_16_BANK));
616 break;
617 case 12:
618 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
619 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
620 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
621 NUM_BANKS(ADDR_SURF_8_BANK));
622 break;
623 case 13:
624 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
625 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
626 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
627 NUM_BANKS(ADDR_SURF_4_BANK));
628 break;
629 case 14:
630 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
631 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
632 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
633 NUM_BANKS(ADDR_SURF_2_BANK));
634 break;
635 default:
636 gb_tile_moden = 0;
637 break;
638 }
639 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
640 }
641 } else if (num_pipe_configs == 4) {
642 if (num_rbs == 4) {
643 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
644 switch (reg_offset) {
645 case 0:
646 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
648 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
650 break;
651 case 1:
652 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
653 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
655 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
656 break;
657 case 2:
658 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
660 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
661 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
662 break;
663 case 3:
664 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
665 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
666 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
667 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
668 break;
669 case 4:
670 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
671 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
672 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
673 TILE_SPLIT(split_equal_to_row_size));
674 break;
675 case 5:
676 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
677 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
678 break;
679 case 6:
680 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
681 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
683 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
684 break;
685 case 7:
686 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
687 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
688 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
689 TILE_SPLIT(split_equal_to_row_size));
690 break;
691 case 8:
692 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
693 PIPE_CONFIG(ADDR_SURF_P4_16x16));
694 break;
695 case 9:
696 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
697 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
698 break;
699 case 10:
700 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
701 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
702 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
704 break;
705 case 11:
706 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
707 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
708 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
710 break;
711 case 12:
712 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
713 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
714 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
715 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
716 break;
717 case 13:
718 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
719 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
720 break;
721 case 14:
722 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
723 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
726 break;
727 case 16:
728 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
729 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
730 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
731 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
732 break;
733 case 17:
734 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
735 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
738 break;
739 case 27:
740 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
742 break;
743 case 28:
744 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
745 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
748 break;
749 case 29:
750 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
751 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
752 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
754 break;
755 case 30:
756 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
757 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
758 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
760 break;
761 default:
762 gb_tile_moden = 0;
763 break;
764 }
765 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
766 }
767 } else if (num_rbs < 4) {
768 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
769 switch (reg_offset) {
770 case 0:
771 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
773 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
775 break;
776 case 1:
777 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
779 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
780 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
781 break;
782 case 2:
783 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
785 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
787 break;
788 case 3:
789 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
790 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
791 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
792 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
793 break;
794 case 4:
795 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
796 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
797 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
798 TILE_SPLIT(split_equal_to_row_size));
799 break;
800 case 5:
801 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
803 break;
804 case 6:
805 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
807 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
809 break;
810 case 7:
811 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
813 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
814 TILE_SPLIT(split_equal_to_row_size));
815 break;
816 case 8:
817 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
818 PIPE_CONFIG(ADDR_SURF_P4_8x16));
819 break;
820 case 9:
821 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
822 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
823 break;
824 case 10:
825 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
826 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
827 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
829 break;
830 case 11:
831 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
832 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
833 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
835 break;
836 case 12:
837 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
838 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
839 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
841 break;
842 case 13:
843 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
845 break;
846 case 14:
847 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
849 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
851 break;
852 case 16:
853 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
855 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
857 break;
858 case 17:
859 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
860 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
861 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
863 break;
864 case 27:
865 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
866 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
867 break;
868 case 28:
869 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
870 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
871 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
873 break;
874 case 29:
875 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
876 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
877 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
879 break;
880 case 30:
881 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
882 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
883 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
885 break;
886 default:
887 gb_tile_moden = 0;
888 break;
889 }
890 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
891 }
892 }
893 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
894 switch (reg_offset) {
895 case 0:
896 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
899 NUM_BANKS(ADDR_SURF_16_BANK));
900 break;
901 case 1:
902 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
903 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
904 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
905 NUM_BANKS(ADDR_SURF_16_BANK));
906 break;
907 case 2:
908 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
911 NUM_BANKS(ADDR_SURF_16_BANK));
912 break;
913 case 3:
914 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
917 NUM_BANKS(ADDR_SURF_16_BANK));
918 break;
919 case 4:
920 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
923 NUM_BANKS(ADDR_SURF_16_BANK));
924 break;
925 case 5:
926 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
929 NUM_BANKS(ADDR_SURF_8_BANK));
930 break;
931 case 6:
932 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
935 NUM_BANKS(ADDR_SURF_4_BANK));
936 break;
937 case 8:
938 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
941 NUM_BANKS(ADDR_SURF_16_BANK));
942 break;
943 case 9:
944 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
947 NUM_BANKS(ADDR_SURF_16_BANK));
948 break;
949 case 10:
950 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
953 NUM_BANKS(ADDR_SURF_16_BANK));
954 break;
955 case 11:
956 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
959 NUM_BANKS(ADDR_SURF_16_BANK));
960 break;
961 case 12:
962 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
965 NUM_BANKS(ADDR_SURF_16_BANK));
966 break;
967 case 13:
968 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
971 NUM_BANKS(ADDR_SURF_8_BANK));
972 break;
973 case 14:
974 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
977 NUM_BANKS(ADDR_SURF_4_BANK));
978 break;
979 default:
980 gb_tile_moden = 0;
981 break;
982 }
983 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
984 }
985 } else if (num_pipe_configs == 2) {
986 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
987 switch (reg_offset) {
988 case 0:
989 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
991 PIPE_CONFIG(ADDR_SURF_P2) |
992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
993 break;
994 case 1:
995 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
996 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
997 PIPE_CONFIG(ADDR_SURF_P2) |
998 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
999 break;
1000 case 2:
1001 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1003 PIPE_CONFIG(ADDR_SURF_P2) |
1004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1005 break;
1006 case 3:
1007 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1008 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1009 PIPE_CONFIG(ADDR_SURF_P2) |
1010 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1011 break;
1012 case 4:
1013 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1015 PIPE_CONFIG(ADDR_SURF_P2) |
1016 TILE_SPLIT(split_equal_to_row_size));
1017 break;
1018 case 5:
1019 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1021 break;
1022 case 6:
1023 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1024 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1025 PIPE_CONFIG(ADDR_SURF_P2) |
1026 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1027 break;
1028 case 7:
1029 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1030 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1031 PIPE_CONFIG(ADDR_SURF_P2) |
1032 TILE_SPLIT(split_equal_to_row_size));
1033 break;
1034 case 8:
1035 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1036 break;
1037 case 9:
1038 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1040 break;
1041 case 10:
1042 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1043 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1044 PIPE_CONFIG(ADDR_SURF_P2) |
1045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 break;
1047 case 11:
1048 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1049 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1050 PIPE_CONFIG(ADDR_SURF_P2) |
1051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1052 break;
1053 case 12:
1054 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1056 PIPE_CONFIG(ADDR_SURF_P2) |
1057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1058 break;
1059 case 13:
1060 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1062 break;
1063 case 14:
1064 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1066 PIPE_CONFIG(ADDR_SURF_P2) |
1067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1068 break;
1069 case 16:
1070 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1072 PIPE_CONFIG(ADDR_SURF_P2) |
1073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1074 break;
1075 case 17:
1076 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1077 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1078 PIPE_CONFIG(ADDR_SURF_P2) |
1079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1080 break;
1081 case 27:
1082 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1083 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1084 break;
1085 case 28:
1086 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1088 PIPE_CONFIG(ADDR_SURF_P2) |
1089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1090 break;
1091 case 29:
1092 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1094 PIPE_CONFIG(ADDR_SURF_P2) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1096 break;
1097 case 30:
1098 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1099 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1100 PIPE_CONFIG(ADDR_SURF_P2) |
1101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1102 break;
1103 default:
1104 gb_tile_moden = 0;
1105 break;
1106 }
1107 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1108 }
1109 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1110 switch (reg_offset) {
1111 case 0:
1112 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1115 NUM_BANKS(ADDR_SURF_16_BANK));
1116 break;
1117 case 1:
1118 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1121 NUM_BANKS(ADDR_SURF_16_BANK));
1122 break;
1123 case 2:
1124 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1127 NUM_BANKS(ADDR_SURF_16_BANK));
1128 break;
1129 case 3:
1130 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1133 NUM_BANKS(ADDR_SURF_16_BANK));
1134 break;
1135 case 4:
1136 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1139 NUM_BANKS(ADDR_SURF_16_BANK));
1140 break;
1141 case 5:
1142 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1145 NUM_BANKS(ADDR_SURF_16_BANK));
1146 break;
1147 case 6:
1148 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1151 NUM_BANKS(ADDR_SURF_8_BANK));
1152 break;
1153 case 8:
1154 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1157 NUM_BANKS(ADDR_SURF_16_BANK));
1158 break;
1159 case 9:
1160 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1163 NUM_BANKS(ADDR_SURF_16_BANK));
1164 break;
1165 case 10:
1166 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1169 NUM_BANKS(ADDR_SURF_16_BANK));
1170 break;
1171 case 11:
1172 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1175 NUM_BANKS(ADDR_SURF_16_BANK));
1176 break;
1177 case 12:
1178 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1181 NUM_BANKS(ADDR_SURF_16_BANK));
1182 break;
1183 case 13:
1184 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1187 NUM_BANKS(ADDR_SURF_16_BANK));
1188 break;
1189 case 14:
1190 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1193 NUM_BANKS(ADDR_SURF_8_BANK));
1194 break;
1195 default:
1196 gb_tile_moden = 0;
1197 break;
1198 }
1199 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1200 }
1201 } else
1202 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1203}
1204
1205/**
1206 * cik_select_se_sh - select which SE, SH to address
1207 *
1208 * @rdev: radeon_device pointer
1209 * @se_num: shader engine to address
1210 * @sh_num: sh block to address
1211 *
1212 * Select which SE, SH combinations to address. Certain
1213 * registers are instanced per SE or SH. 0xffffffff means
1214 * broadcast to all SEs or SHs (CIK).
1215 */
1216static void cik_select_se_sh(struct radeon_device *rdev,
1217 u32 se_num, u32 sh_num)
1218{
1219 u32 data = INSTANCE_BROADCAST_WRITES;
1220
1221 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1222 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1223 else if (se_num == 0xffffffff)
1224 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1225 else if (sh_num == 0xffffffff)
1226 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1227 else
1228 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1229 WREG32(GRBM_GFX_INDEX, data);
1230}
1231
1232/**
1233 * cik_create_bitmask - create a bitmask
1234 *
1235 * @bit_width: length of the mask
1236 *
1237 * create a variable length bit mask (CIK).
1238 * Returns the bitmask.
1239 */
1240static u32 cik_create_bitmask(u32 bit_width)
1241{
1242 u32 i, mask = 0;
1243
1244 for (i = 0; i < bit_width; i++) {
1245 mask <<= 1;
1246 mask |= 1;
1247 }
1248 return mask;
1249}
1250
1251/**
1252 * cik_select_se_sh - select which SE, SH to address
1253 *
1254 * @rdev: radeon_device pointer
1255 * @max_rb_num: max RBs (render backends) for the asic
1256 * @se_num: number of SEs (shader engines) for the asic
1257 * @sh_per_se: number of SH blocks per SE for the asic
1258 *
1259 * Calculates the bitmask of disabled RBs (CIK).
1260 * Returns the disabled RB bitmask.
1261 */
1262static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1263 u32 max_rb_num, u32 se_num,
1264 u32 sh_per_se)
1265{
1266 u32 data, mask;
1267
1268 data = RREG32(CC_RB_BACKEND_DISABLE);
1269 if (data & 1)
1270 data &= BACKEND_DISABLE_MASK;
1271 else
1272 data = 0;
1273 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1274
1275 data >>= BACKEND_DISABLE_SHIFT;
1276
1277 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1278
1279 return data & mask;
1280}
1281
1282/**
1283 * cik_setup_rb - setup the RBs on the asic
1284 *
1285 * @rdev: radeon_device pointer
1286 * @se_num: number of SEs (shader engines) for the asic
1287 * @sh_per_se: number of SH blocks per SE for the asic
1288 * @max_rb_num: max RBs (render backends) for the asic
1289 *
1290 * Configures per-SE/SH RB registers (CIK).
1291 */
1292static void cik_setup_rb(struct radeon_device *rdev,
1293 u32 se_num, u32 sh_per_se,
1294 u32 max_rb_num)
1295{
1296 int i, j;
1297 u32 data, mask;
1298 u32 disabled_rbs = 0;
1299 u32 enabled_rbs = 0;
1300
1301 for (i = 0; i < se_num; i++) {
1302 for (j = 0; j < sh_per_se; j++) {
1303 cik_select_se_sh(rdev, i, j);
1304 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1305 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1306 }
1307 }
1308 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1309
1310 mask = 1;
1311 for (i = 0; i < max_rb_num; i++) {
1312 if (!(disabled_rbs & mask))
1313 enabled_rbs |= mask;
1314 mask <<= 1;
1315 }
1316
1317 for (i = 0; i < se_num; i++) {
1318 cik_select_se_sh(rdev, i, 0xffffffff);
1319 data = 0;
1320 for (j = 0; j < sh_per_se; j++) {
1321 switch (enabled_rbs & 3) {
1322 case 1:
1323 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1324 break;
1325 case 2:
1326 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1327 break;
1328 case 3:
1329 default:
1330 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1331 break;
1332 }
1333 enabled_rbs >>= 2;
1334 }
1335 WREG32(PA_SC_RASTER_CONFIG, data);
1336 }
1337 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1338}
1339
1340/**
1341 * cik_gpu_init - setup the 3D engine
1342 *
1343 * @rdev: radeon_device pointer
1344 *
1345 * Configures the 3D engine and tiling configuration
1346 * registers so that the 3D engine is usable.
1347 */
1348static void cik_gpu_init(struct radeon_device *rdev)
1349{
1350 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1351 u32 mc_shared_chmap, mc_arb_ramcfg;
1352 u32 hdp_host_path_cntl;
1353 u32 tmp;
1354 int i, j;
1355
1356 switch (rdev->family) {
1357 case CHIP_BONAIRE:
1358 rdev->config.cik.max_shader_engines = 2;
1359 rdev->config.cik.max_tile_pipes = 4;
1360 rdev->config.cik.max_cu_per_sh = 7;
1361 rdev->config.cik.max_sh_per_se = 1;
1362 rdev->config.cik.max_backends_per_se = 2;
1363 rdev->config.cik.max_texture_channel_caches = 4;
1364 rdev->config.cik.max_gprs = 256;
1365 rdev->config.cik.max_gs_threads = 32;
1366 rdev->config.cik.max_hw_contexts = 8;
1367
1368 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1369 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1370 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1371 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1372 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1373 break;
1374 case CHIP_KAVERI:
1375 /* TODO */
1376 break;
1377 case CHIP_KABINI:
1378 default:
1379 rdev->config.cik.max_shader_engines = 1;
1380 rdev->config.cik.max_tile_pipes = 2;
1381 rdev->config.cik.max_cu_per_sh = 2;
1382 rdev->config.cik.max_sh_per_se = 1;
1383 rdev->config.cik.max_backends_per_se = 1;
1384 rdev->config.cik.max_texture_channel_caches = 2;
1385 rdev->config.cik.max_gprs = 256;
1386 rdev->config.cik.max_gs_threads = 16;
1387 rdev->config.cik.max_hw_contexts = 8;
1388
1389 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1390 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1391 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1392 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1393 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1394 break;
1395 }
1396
1397 /* Initialize HDP */
1398 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1399 WREG32((0x2c14 + j), 0x00000000);
1400 WREG32((0x2c18 + j), 0x00000000);
1401 WREG32((0x2c1c + j), 0x00000000);
1402 WREG32((0x2c20 + j), 0x00000000);
1403 WREG32((0x2c24 + j), 0x00000000);
1404 }
1405
1406 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1407
1408 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1409
1410 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1411 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1412
1413 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1414 rdev->config.cik.mem_max_burst_length_bytes = 256;
1415 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1416 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1417 if (rdev->config.cik.mem_row_size_in_kb > 4)
1418 rdev->config.cik.mem_row_size_in_kb = 4;
1419 /* XXX use MC settings? */
1420 rdev->config.cik.shader_engine_tile_size = 32;
1421 rdev->config.cik.num_gpus = 1;
1422 rdev->config.cik.multi_gpu_tile_size = 64;
1423
1424 /* fix up row size */
1425 gb_addr_config &= ~ROW_SIZE_MASK;
1426 switch (rdev->config.cik.mem_row_size_in_kb) {
1427 case 1:
1428 default:
1429 gb_addr_config |= ROW_SIZE(0);
1430 break;
1431 case 2:
1432 gb_addr_config |= ROW_SIZE(1);
1433 break;
1434 case 4:
1435 gb_addr_config |= ROW_SIZE(2);
1436 break;
1437 }
1438
1439 /* setup tiling info dword. gb_addr_config is not adequate since it does
1440 * not have bank info, so create a custom tiling dword.
1441 * bits 3:0 num_pipes
1442 * bits 7:4 num_banks
1443 * bits 11:8 group_size
1444 * bits 15:12 row_size
1445 */
1446 rdev->config.cik.tile_config = 0;
1447 switch (rdev->config.cik.num_tile_pipes) {
1448 case 1:
1449 rdev->config.cik.tile_config |= (0 << 0);
1450 break;
1451 case 2:
1452 rdev->config.cik.tile_config |= (1 << 0);
1453 break;
1454 case 4:
1455 rdev->config.cik.tile_config |= (2 << 0);
1456 break;
1457 case 8:
1458 default:
1459 /* XXX what about 12? */
1460 rdev->config.cik.tile_config |= (3 << 0);
1461 break;
1462 }
1463 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1464 rdev->config.cik.tile_config |= 1 << 4;
1465 else
1466 rdev->config.cik.tile_config |= 0 << 4;
1467 rdev->config.cik.tile_config |=
1468 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1469 rdev->config.cik.tile_config |=
1470 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1471
1472 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1473 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1474 WREG32(DMIF_ADDR_CALC, gb_addr_config);
21a93e13
AD
1475 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1476 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
8cc1a532
AD
1477
1478 cik_tiling_mode_table_init(rdev);
1479
1480 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1481 rdev->config.cik.max_sh_per_se,
1482 rdev->config.cik.max_backends_per_se);
1483
1484 /* set HW defaults for 3D engine */
1485 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1486
1487 WREG32(SX_DEBUG_1, 0x20);
1488
1489 WREG32(TA_CNTL_AUX, 0x00010000);
1490
1491 tmp = RREG32(SPI_CONFIG_CNTL);
1492 tmp |= 0x03000000;
1493 WREG32(SPI_CONFIG_CNTL, tmp);
1494
1495 WREG32(SQ_CONFIG, 1);
1496
1497 WREG32(DB_DEBUG, 0);
1498
1499 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1500 tmp |= 0x00000400;
1501 WREG32(DB_DEBUG2, tmp);
1502
1503 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1504 tmp |= 0x00020200;
1505 WREG32(DB_DEBUG3, tmp);
1506
1507 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1508 tmp |= 0x00018208;
1509 WREG32(CB_HW_CONTROL, tmp);
1510
1511 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1512
1513 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1514 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1515 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1516 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1517
1518 WREG32(VGT_NUM_INSTANCES, 1);
1519
1520 WREG32(CP_PERFMON_CNTL, 0);
1521
1522 WREG32(SQ_CONFIG, 0);
1523
1524 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1525 FORCE_EOV_MAX_REZ_CNT(255)));
1526
1527 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1528 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1529
1530 WREG32(VGT_GS_VERTEX_REUSE, 16);
1531 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1532
1533 tmp = RREG32(HDP_MISC_CNTL);
1534 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1535 WREG32(HDP_MISC_CNTL, tmp);
1536
1537 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1538 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1539
1540 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1541 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1542
1543 udelay(50);
1544}
1545
2cae3bc3
AD
1546/*
1547 * GPU scratch registers helpers function.
1548 */
1549/**
1550 * cik_scratch_init - setup driver info for CP scratch regs
1551 *
1552 * @rdev: radeon_device pointer
1553 *
1554 * Set up the number and offset of the CP scratch registers.
1555 * NOTE: use of CP scratch registers is a legacy inferface and
1556 * is not used by default on newer asics (r6xx+). On newer asics,
1557 * memory buffers are used for fences rather than scratch regs.
1558 */
1559static void cik_scratch_init(struct radeon_device *rdev)
1560{
1561 int i;
1562
1563 rdev->scratch.num_reg = 7;
1564 rdev->scratch.reg_base = SCRATCH_REG0;
1565 for (i = 0; i < rdev->scratch.num_reg; i++) {
1566 rdev->scratch.free[i] = true;
1567 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1568 }
1569}
1570
fbc832c7
AD
1571/**
1572 * cik_ring_test - basic gfx ring test
1573 *
1574 * @rdev: radeon_device pointer
1575 * @ring: radeon_ring structure holding ring information
1576 *
1577 * Allocate a scratch register and write to it using the gfx ring (CIK).
1578 * Provides a basic gfx ring test to verify that the ring is working.
1579 * Used by cik_cp_gfx_resume();
1580 * Returns 0 on success, error on failure.
1581 */
1582int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1583{
1584 uint32_t scratch;
1585 uint32_t tmp = 0;
1586 unsigned i;
1587 int r;
1588
1589 r = radeon_scratch_get(rdev, &scratch);
1590 if (r) {
1591 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1592 return r;
1593 }
1594 WREG32(scratch, 0xCAFEDEAD);
1595 r = radeon_ring_lock(rdev, ring, 3);
1596 if (r) {
1597 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1598 radeon_scratch_free(rdev, scratch);
1599 return r;
1600 }
1601 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1602 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1603 radeon_ring_write(ring, 0xDEADBEEF);
1604 radeon_ring_unlock_commit(rdev, ring);
1605 for (i = 0; i < rdev->usec_timeout; i++) {
1606 tmp = RREG32(scratch);
1607 if (tmp == 0xDEADBEEF)
1608 break;
1609 DRM_UDELAY(1);
1610 }
1611 if (i < rdev->usec_timeout) {
1612 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1613 } else {
1614 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1615 ring->idx, scratch, tmp);
1616 r = -EINVAL;
1617 }
1618 radeon_scratch_free(rdev, scratch);
1619 return r;
1620}
1621
2cae3bc3
AD
1622/**
1623 * cik_fence_ring_emit - emit a fence on the gfx ring
1624 *
1625 * @rdev: radeon_device pointer
1626 * @fence: radeon fence object
1627 *
1628 * Emits a fence sequnce number on the gfx ring and flushes
1629 * GPU caches.
1630 */
1631void cik_fence_ring_emit(struct radeon_device *rdev,
1632 struct radeon_fence *fence)
1633{
1634 struct radeon_ring *ring = &rdev->ring[fence->ring];
1635 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1636
1637 /* EVENT_WRITE_EOP - flush caches, send int */
1638 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1639 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1640 EOP_TC_ACTION_EN |
1641 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1642 EVENT_INDEX(5)));
1643 radeon_ring_write(ring, addr & 0xfffffffc);
1644 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1645 radeon_ring_write(ring, fence->seq);
1646 radeon_ring_write(ring, 0);
1647 /* HDP flush */
1648 /* We should be using the new WAIT_REG_MEM special op packet here
1649 * but it causes the CP to hang
1650 */
1651 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1652 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1653 WRITE_DATA_DST_SEL(0)));
1654 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1655 radeon_ring_write(ring, 0);
1656 radeon_ring_write(ring, 0);
1657}
1658
1659void cik_semaphore_ring_emit(struct radeon_device *rdev,
1660 struct radeon_ring *ring,
1661 struct radeon_semaphore *semaphore,
1662 bool emit_wait)
1663{
1664 uint64_t addr = semaphore->gpu_addr;
1665 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1666
1667 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1668 radeon_ring_write(ring, addr & 0xffffffff);
1669 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1670}
1671
1672/*
1673 * IB stuff
1674 */
1675/**
1676 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1677 *
1678 * @rdev: radeon_device pointer
1679 * @ib: radeon indirect buffer object
1680 *
1681 * Emits an DE (drawing engine) or CE (constant engine) IB
1682 * on the gfx ring. IBs are usually generated by userspace
1683 * acceleration drivers and submitted to the kernel for
1684 * sheduling on the ring. This function schedules the IB
1685 * on the gfx ring for execution by the GPU.
1686 */
1687void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1688{
1689 struct radeon_ring *ring = &rdev->ring[ib->ring];
1690 u32 header, control = INDIRECT_BUFFER_VALID;
1691
1692 if (ib->is_const_ib) {
1693 /* set switch buffer packet before const IB */
1694 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1695 radeon_ring_write(ring, 0);
1696
1697 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1698 } else {
1699 u32 next_rptr;
1700 if (ring->rptr_save_reg) {
1701 next_rptr = ring->wptr + 3 + 4;
1702 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1703 radeon_ring_write(ring, ((ring->rptr_save_reg -
1704 PACKET3_SET_UCONFIG_REG_START) >> 2));
1705 radeon_ring_write(ring, next_rptr);
1706 } else if (rdev->wb.enabled) {
1707 next_rptr = ring->wptr + 5 + 4;
1708 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1709 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1710 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1711 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1712 radeon_ring_write(ring, next_rptr);
1713 }
1714
1715 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1716 }
1717
1718 control |= ib->length_dw |
1719 (ib->vm ? (ib->vm->id << 24) : 0);
1720
1721 radeon_ring_write(ring, header);
1722 radeon_ring_write(ring,
1723#ifdef __BIG_ENDIAN
1724 (2 << 0) |
1725#endif
1726 (ib->gpu_addr & 0xFFFFFFFC));
1727 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1728 radeon_ring_write(ring, control);
1729}
1730
fbc832c7
AD
1731/**
1732 * cik_ib_test - basic gfx ring IB test
1733 *
1734 * @rdev: radeon_device pointer
1735 * @ring: radeon_ring structure holding ring information
1736 *
1737 * Allocate an IB and execute it on the gfx ring (CIK).
1738 * Provides a basic gfx ring test to verify that IBs are working.
1739 * Returns 0 on success, error on failure.
1740 */
1741int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1742{
1743 struct radeon_ib ib;
1744 uint32_t scratch;
1745 uint32_t tmp = 0;
1746 unsigned i;
1747 int r;
1748
1749 r = radeon_scratch_get(rdev, &scratch);
1750 if (r) {
1751 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1752 return r;
1753 }
1754 WREG32(scratch, 0xCAFEDEAD);
1755 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1756 if (r) {
1757 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1758 return r;
1759 }
1760 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1761 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1762 ib.ptr[2] = 0xDEADBEEF;
1763 ib.length_dw = 3;
1764 r = radeon_ib_schedule(rdev, &ib, NULL);
1765 if (r) {
1766 radeon_scratch_free(rdev, scratch);
1767 radeon_ib_free(rdev, &ib);
1768 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1769 return r;
1770 }
1771 r = radeon_fence_wait(ib.fence, false);
1772 if (r) {
1773 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1774 return r;
1775 }
1776 for (i = 0; i < rdev->usec_timeout; i++) {
1777 tmp = RREG32(scratch);
1778 if (tmp == 0xDEADBEEF)
1779 break;
1780 DRM_UDELAY(1);
1781 }
1782 if (i < rdev->usec_timeout) {
1783 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1784 } else {
1785 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1786 scratch, tmp);
1787 r = -EINVAL;
1788 }
1789 radeon_scratch_free(rdev, scratch);
1790 radeon_ib_free(rdev, &ib);
1791 return r;
1792}
1793
841cf442
AD
1794/*
1795 * CP.
1796 * On CIK, gfx and compute now have independant command processors.
1797 *
1798 * GFX
1799 * Gfx consists of a single ring and can process both gfx jobs and
1800 * compute jobs. The gfx CP consists of three microengines (ME):
1801 * PFP - Pre-Fetch Parser
1802 * ME - Micro Engine
1803 * CE - Constant Engine
1804 * The PFP and ME make up what is considered the Drawing Engine (DE).
1805 * The CE is an asynchronous engine used for updating buffer desciptors
1806 * used by the DE so that they can be loaded into cache in parallel
1807 * while the DE is processing state update packets.
1808 *
1809 * Compute
1810 * The compute CP consists of two microengines (ME):
1811 * MEC1 - Compute MicroEngine 1
1812 * MEC2 - Compute MicroEngine 2
1813 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1814 * The queues are exposed to userspace and are programmed directly
1815 * by the compute runtime.
1816 */
1817/**
1818 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1819 *
1820 * @rdev: radeon_device pointer
1821 * @enable: enable or disable the MEs
1822 *
1823 * Halts or unhalts the gfx MEs.
1824 */
1825static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1826{
1827 if (enable)
1828 WREG32(CP_ME_CNTL, 0);
1829 else {
1830 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1831 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1832 }
1833 udelay(50);
1834}
1835
1836/**
1837 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1838 *
1839 * @rdev: radeon_device pointer
1840 *
1841 * Loads the gfx PFP, ME, and CE ucode.
1842 * Returns 0 for success, -EINVAL if the ucode is not available.
1843 */
1844static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1845{
1846 const __be32 *fw_data;
1847 int i;
1848
1849 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1850 return -EINVAL;
1851
1852 cik_cp_gfx_enable(rdev, false);
1853
1854 /* PFP */
1855 fw_data = (const __be32 *)rdev->pfp_fw->data;
1856 WREG32(CP_PFP_UCODE_ADDR, 0);
1857 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1858 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1859 WREG32(CP_PFP_UCODE_ADDR, 0);
1860
1861 /* CE */
1862 fw_data = (const __be32 *)rdev->ce_fw->data;
1863 WREG32(CP_CE_UCODE_ADDR, 0);
1864 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1865 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1866 WREG32(CP_CE_UCODE_ADDR, 0);
1867
1868 /* ME */
1869 fw_data = (const __be32 *)rdev->me_fw->data;
1870 WREG32(CP_ME_RAM_WADDR, 0);
1871 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1872 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1873 WREG32(CP_ME_RAM_WADDR, 0);
1874
1875 WREG32(CP_PFP_UCODE_ADDR, 0);
1876 WREG32(CP_CE_UCODE_ADDR, 0);
1877 WREG32(CP_ME_RAM_WADDR, 0);
1878 WREG32(CP_ME_RAM_RADDR, 0);
1879 return 0;
1880}
1881
1882/**
1883 * cik_cp_gfx_start - start the gfx ring
1884 *
1885 * @rdev: radeon_device pointer
1886 *
1887 * Enables the ring and loads the clear state context and other
1888 * packets required to init the ring.
1889 * Returns 0 for success, error for failure.
1890 */
1891static int cik_cp_gfx_start(struct radeon_device *rdev)
1892{
1893 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1894 int r, i;
1895
1896 /* init the CP */
1897 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1898 WREG32(CP_ENDIAN_SWAP, 0);
1899 WREG32(CP_DEVICE_ID, 1);
1900
1901 cik_cp_gfx_enable(rdev, true);
1902
1903 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1904 if (r) {
1905 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1906 return r;
1907 }
1908
1909 /* init the CE partitions. CE only used for gfx on CIK */
1910 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1911 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1912 radeon_ring_write(ring, 0xc000);
1913 radeon_ring_write(ring, 0xc000);
1914
1915 /* setup clear context state */
1916 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1917 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1918
1919 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1920 radeon_ring_write(ring, 0x80000000);
1921 radeon_ring_write(ring, 0x80000000);
1922
1923 for (i = 0; i < cik_default_size; i++)
1924 radeon_ring_write(ring, cik_default_state[i]);
1925
1926 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1927 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1928
1929 /* set clear context state */
1930 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1931 radeon_ring_write(ring, 0);
1932
1933 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1934 radeon_ring_write(ring, 0x00000316);
1935 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1936 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1937
1938 radeon_ring_unlock_commit(rdev, ring);
1939
1940 return 0;
1941}
1942
1943/**
1944 * cik_cp_gfx_fini - stop the gfx ring
1945 *
1946 * @rdev: radeon_device pointer
1947 *
1948 * Stop the gfx ring and tear down the driver ring
1949 * info.
1950 */
1951static void cik_cp_gfx_fini(struct radeon_device *rdev)
1952{
1953 cik_cp_gfx_enable(rdev, false);
1954 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1955}
1956
1957/**
1958 * cik_cp_gfx_resume - setup the gfx ring buffer registers
1959 *
1960 * @rdev: radeon_device pointer
1961 *
1962 * Program the location and size of the gfx ring buffer
1963 * and test it to make sure it's working.
1964 * Returns 0 for success, error for failure.
1965 */
1966static int cik_cp_gfx_resume(struct radeon_device *rdev)
1967{
1968 struct radeon_ring *ring;
1969 u32 tmp;
1970 u32 rb_bufsz;
1971 u64 rb_addr;
1972 int r;
1973
1974 WREG32(CP_SEM_WAIT_TIMER, 0x0);
1975 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1976
1977 /* Set the write pointer delay */
1978 WREG32(CP_RB_WPTR_DELAY, 0);
1979
1980 /* set the RB to use vmid 0 */
1981 WREG32(CP_RB_VMID, 0);
1982
1983 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1984
1985 /* ring 0 - compute and gfx */
1986 /* Set ring buffer size */
1987 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1988 rb_bufsz = drm_order(ring->ring_size / 8);
1989 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1990#ifdef __BIG_ENDIAN
1991 tmp |= BUF_SWAP_32BIT;
1992#endif
1993 WREG32(CP_RB0_CNTL, tmp);
1994
1995 /* Initialize the ring buffer's read and write pointers */
1996 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1997 ring->wptr = 0;
1998 WREG32(CP_RB0_WPTR, ring->wptr);
1999
2000 /* set the wb address wether it's enabled or not */
2001 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2002 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2003
2004 /* scratch register shadowing is no longer supported */
2005 WREG32(SCRATCH_UMSK, 0);
2006
2007 if (!rdev->wb.enabled)
2008 tmp |= RB_NO_UPDATE;
2009
2010 mdelay(1);
2011 WREG32(CP_RB0_CNTL, tmp);
2012
2013 rb_addr = ring->gpu_addr >> 8;
2014 WREG32(CP_RB0_BASE, rb_addr);
2015 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2016
2017 ring->rptr = RREG32(CP_RB0_RPTR);
2018
2019 /* start the ring */
2020 cik_cp_gfx_start(rdev);
2021 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2022 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2023 if (r) {
2024 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2025 return r;
2026 }
2027 return 0;
2028}
2029
2030/**
2031 * cik_cp_compute_enable - enable/disable the compute CP MEs
2032 *
2033 * @rdev: radeon_device pointer
2034 * @enable: enable or disable the MEs
2035 *
2036 * Halts or unhalts the compute MEs.
2037 */
2038static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2039{
2040 if (enable)
2041 WREG32(CP_MEC_CNTL, 0);
2042 else
2043 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2044 udelay(50);
2045}
2046
2047/**
2048 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2049 *
2050 * @rdev: radeon_device pointer
2051 *
2052 * Loads the compute MEC1&2 ucode.
2053 * Returns 0 for success, -EINVAL if the ucode is not available.
2054 */
2055static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2056{
2057 const __be32 *fw_data;
2058 int i;
2059
2060 if (!rdev->mec_fw)
2061 return -EINVAL;
2062
2063 cik_cp_compute_enable(rdev, false);
2064
2065 /* MEC1 */
2066 fw_data = (const __be32 *)rdev->mec_fw->data;
2067 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2068 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2069 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2070 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2071
2072 if (rdev->family == CHIP_KAVERI) {
2073 /* MEC2 */
2074 fw_data = (const __be32 *)rdev->mec_fw->data;
2075 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2076 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2077 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2078 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2079 }
2080
2081 return 0;
2082}
2083
2084/**
2085 * cik_cp_compute_start - start the compute queues
2086 *
2087 * @rdev: radeon_device pointer
2088 *
2089 * Enable the compute queues.
2090 * Returns 0 for success, error for failure.
2091 */
2092static int cik_cp_compute_start(struct radeon_device *rdev)
2093{
2094 //todo
2095 return 0;
2096}
2097
2098/**
2099 * cik_cp_compute_fini - stop the compute queues
2100 *
2101 * @rdev: radeon_device pointer
2102 *
2103 * Stop the compute queues and tear down the driver queue
2104 * info.
2105 */
2106static void cik_cp_compute_fini(struct radeon_device *rdev)
2107{
2108 cik_cp_compute_enable(rdev, false);
2109 //todo
2110}
2111
2112/**
2113 * cik_cp_compute_resume - setup the compute queue registers
2114 *
2115 * @rdev: radeon_device pointer
2116 *
2117 * Program the compute queues and test them to make sure they
2118 * are working.
2119 * Returns 0 for success, error for failure.
2120 */
2121static int cik_cp_compute_resume(struct radeon_device *rdev)
2122{
2123 int r;
2124
2125 //todo
2126 r = cik_cp_compute_start(rdev);
2127 if (r)
2128 return r;
2129 return 0;
2130}
2131
2132/* XXX temporary wrappers to handle both compute and gfx */
2133/* XXX */
2134static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2135{
2136 cik_cp_gfx_enable(rdev, enable);
2137 cik_cp_compute_enable(rdev, enable);
2138}
2139
2140/* XXX */
2141static int cik_cp_load_microcode(struct radeon_device *rdev)
2142{
2143 int r;
2144
2145 r = cik_cp_gfx_load_microcode(rdev);
2146 if (r)
2147 return r;
2148 r = cik_cp_compute_load_microcode(rdev);
2149 if (r)
2150 return r;
2151
2152 return 0;
2153}
2154
2155/* XXX */
2156static void cik_cp_fini(struct radeon_device *rdev)
2157{
2158 cik_cp_gfx_fini(rdev);
2159 cik_cp_compute_fini(rdev);
2160}
2161
2162/* XXX */
2163static int cik_cp_resume(struct radeon_device *rdev)
2164{
2165 int r;
2166
2167 /* Reset all cp blocks */
2168 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2169 RREG32(GRBM_SOFT_RESET);
2170 mdelay(15);
2171 WREG32(GRBM_SOFT_RESET, 0);
2172 RREG32(GRBM_SOFT_RESET);
2173
2174 r = cik_cp_load_microcode(rdev);
2175 if (r)
2176 return r;
2177
2178 r = cik_cp_gfx_resume(rdev);
2179 if (r)
2180 return r;
2181 r = cik_cp_compute_resume(rdev);
2182 if (r)
2183 return r;
2184
2185 return 0;
2186}
2187
21a93e13
AD
2188/*
2189 * sDMA - System DMA
2190 * Starting with CIK, the GPU has new asynchronous
2191 * DMA engines. These engines are used for compute
2192 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2193 * and each one supports 1 ring buffer used for gfx
2194 * and 2 queues used for compute.
2195 *
2196 * The programming model is very similar to the CP
2197 * (ring buffer, IBs, etc.), but sDMA has it's own
2198 * packet format that is different from the PM4 format
2199 * used by the CP. sDMA supports copying data, writing
2200 * embedded data, solid fills, and a number of other
2201 * things. It also has support for tiling/detiling of
2202 * buffers.
2203 */
2204/**
2205 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2206 *
2207 * @rdev: radeon_device pointer
2208 * @ib: IB object to schedule
2209 *
2210 * Schedule an IB in the DMA ring (CIK).
2211 */
2212void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2213 struct radeon_ib *ib)
2214{
2215 struct radeon_ring *ring = &rdev->ring[ib->ring];
2216 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2217
2218 if (rdev->wb.enabled) {
2219 u32 next_rptr = ring->wptr + 5;
2220 while ((next_rptr & 7) != 4)
2221 next_rptr++;
2222 next_rptr += 4;
2223 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2224 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2225 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2226 radeon_ring_write(ring, 1); /* number of DWs to follow */
2227 radeon_ring_write(ring, next_rptr);
2228 }
2229
2230 /* IB packet must end on a 8 DW boundary */
2231 while ((ring->wptr & 7) != 4)
2232 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2233 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2234 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2235 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2236 radeon_ring_write(ring, ib->length_dw);
2237
2238}
2239
2240/**
2241 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2242 *
2243 * @rdev: radeon_device pointer
2244 * @fence: radeon fence object
2245 *
2246 * Add a DMA fence packet to the ring to write
2247 * the fence seq number and DMA trap packet to generate
2248 * an interrupt if needed (CIK).
2249 */
2250void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2251 struct radeon_fence *fence)
2252{
2253 struct radeon_ring *ring = &rdev->ring[fence->ring];
2254 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2255 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2256 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2257 u32 ref_and_mask;
2258
2259 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2260 ref_and_mask = SDMA0;
2261 else
2262 ref_and_mask = SDMA1;
2263
2264 /* write the fence */
2265 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2266 radeon_ring_write(ring, addr & 0xffffffff);
2267 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2268 radeon_ring_write(ring, fence->seq);
2269 /* generate an interrupt */
2270 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2271 /* flush HDP */
2272 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2273 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2274 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2275 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2276 radeon_ring_write(ring, ref_and_mask); /* MASK */
2277 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2278}
2279
2280/**
2281 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2282 *
2283 * @rdev: radeon_device pointer
2284 * @ring: radeon_ring structure holding ring information
2285 * @semaphore: radeon semaphore object
2286 * @emit_wait: wait or signal semaphore
2287 *
2288 * Add a DMA semaphore packet to the ring wait on or signal
2289 * other rings (CIK).
2290 */
2291void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2292 struct radeon_ring *ring,
2293 struct radeon_semaphore *semaphore,
2294 bool emit_wait)
2295{
2296 u64 addr = semaphore->gpu_addr;
2297 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2298
2299 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2300 radeon_ring_write(ring, addr & 0xfffffff8);
2301 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2302}
2303
2304/**
2305 * cik_sdma_gfx_stop - stop the gfx async dma engines
2306 *
2307 * @rdev: radeon_device pointer
2308 *
2309 * Stop the gfx async dma ring buffers (CIK).
2310 */
2311static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2312{
2313 u32 rb_cntl, reg_offset;
2314 int i;
2315
2316 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2317
2318 for (i = 0; i < 2; i++) {
2319 if (i == 0)
2320 reg_offset = SDMA0_REGISTER_OFFSET;
2321 else
2322 reg_offset = SDMA1_REGISTER_OFFSET;
2323 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2324 rb_cntl &= ~SDMA_RB_ENABLE;
2325 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2326 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2327 }
2328}
2329
2330/**
2331 * cik_sdma_rlc_stop - stop the compute async dma engines
2332 *
2333 * @rdev: radeon_device pointer
2334 *
2335 * Stop the compute async dma queues (CIK).
2336 */
2337static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2338{
2339 /* XXX todo */
2340}
2341
2342/**
2343 * cik_sdma_enable - stop the async dma engines
2344 *
2345 * @rdev: radeon_device pointer
2346 * @enable: enable/disable the DMA MEs.
2347 *
2348 * Halt or unhalt the async dma engines (CIK).
2349 */
2350static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2351{
2352 u32 me_cntl, reg_offset;
2353 int i;
2354
2355 for (i = 0; i < 2; i++) {
2356 if (i == 0)
2357 reg_offset = SDMA0_REGISTER_OFFSET;
2358 else
2359 reg_offset = SDMA1_REGISTER_OFFSET;
2360 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2361 if (enable)
2362 me_cntl &= ~SDMA_HALT;
2363 else
2364 me_cntl |= SDMA_HALT;
2365 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2366 }
2367}
2368
2369/**
2370 * cik_sdma_gfx_resume - setup and start the async dma engines
2371 *
2372 * @rdev: radeon_device pointer
2373 *
2374 * Set up the gfx DMA ring buffers and enable them (CIK).
2375 * Returns 0 for success, error for failure.
2376 */
2377static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2378{
2379 struct radeon_ring *ring;
2380 u32 rb_cntl, ib_cntl;
2381 u32 rb_bufsz;
2382 u32 reg_offset, wb_offset;
2383 int i, r;
2384
2385 for (i = 0; i < 2; i++) {
2386 if (i == 0) {
2387 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2388 reg_offset = SDMA0_REGISTER_OFFSET;
2389 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2390 } else {
2391 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2392 reg_offset = SDMA1_REGISTER_OFFSET;
2393 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2394 }
2395
2396 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2397 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2398
2399 /* Set ring buffer size in dwords */
2400 rb_bufsz = drm_order(ring->ring_size / 4);
2401 rb_cntl = rb_bufsz << 1;
2402#ifdef __BIG_ENDIAN
2403 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2404#endif
2405 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2406
2407 /* Initialize the ring buffer's read and write pointers */
2408 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2409 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2410
2411 /* set the wb address whether it's enabled or not */
2412 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2413 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2414 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2415 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2416
2417 if (rdev->wb.enabled)
2418 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2419
2420 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2421 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2422
2423 ring->wptr = 0;
2424 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2425
2426 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2427
2428 /* enable DMA RB */
2429 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2430
2431 ib_cntl = SDMA_IB_ENABLE;
2432#ifdef __BIG_ENDIAN
2433 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2434#endif
2435 /* enable DMA IBs */
2436 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2437
2438 ring->ready = true;
2439
2440 r = radeon_ring_test(rdev, ring->idx, ring);
2441 if (r) {
2442 ring->ready = false;
2443 return r;
2444 }
2445 }
2446
2447 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2448
2449 return 0;
2450}
2451
2452/**
2453 * cik_sdma_rlc_resume - setup and start the async dma engines
2454 *
2455 * @rdev: radeon_device pointer
2456 *
2457 * Set up the compute DMA queues and enable them (CIK).
2458 * Returns 0 for success, error for failure.
2459 */
2460static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2461{
2462 /* XXX todo */
2463 return 0;
2464}
2465
2466/**
2467 * cik_sdma_load_microcode - load the sDMA ME ucode
2468 *
2469 * @rdev: radeon_device pointer
2470 *
2471 * Loads the sDMA0/1 ucode.
2472 * Returns 0 for success, -EINVAL if the ucode is not available.
2473 */
2474static int cik_sdma_load_microcode(struct radeon_device *rdev)
2475{
2476 const __be32 *fw_data;
2477 int i;
2478
2479 if (!rdev->sdma_fw)
2480 return -EINVAL;
2481
2482 /* stop the gfx rings and rlc compute queues */
2483 cik_sdma_gfx_stop(rdev);
2484 cik_sdma_rlc_stop(rdev);
2485
2486 /* halt the MEs */
2487 cik_sdma_enable(rdev, false);
2488
2489 /* sdma0 */
2490 fw_data = (const __be32 *)rdev->sdma_fw->data;
2491 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2492 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2493 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2494 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2495
2496 /* sdma1 */
2497 fw_data = (const __be32 *)rdev->sdma_fw->data;
2498 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2499 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2500 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2501 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2502
2503 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2504 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2505 return 0;
2506}
2507
2508/**
2509 * cik_sdma_resume - setup and start the async dma engines
2510 *
2511 * @rdev: radeon_device pointer
2512 *
2513 * Set up the DMA engines and enable them (CIK).
2514 * Returns 0 for success, error for failure.
2515 */
2516static int cik_sdma_resume(struct radeon_device *rdev)
2517{
2518 int r;
2519
2520 /* Reset dma */
2521 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2522 RREG32(SRBM_SOFT_RESET);
2523 udelay(50);
2524 WREG32(SRBM_SOFT_RESET, 0);
2525 RREG32(SRBM_SOFT_RESET);
2526
2527 r = cik_sdma_load_microcode(rdev);
2528 if (r)
2529 return r;
2530
2531 /* unhalt the MEs */
2532 cik_sdma_enable(rdev, true);
2533
2534 /* start the gfx rings and rlc compute queues */
2535 r = cik_sdma_gfx_resume(rdev);
2536 if (r)
2537 return r;
2538 r = cik_sdma_rlc_resume(rdev);
2539 if (r)
2540 return r;
2541
2542 return 0;
2543}
2544
2545/**
2546 * cik_sdma_fini - tear down the async dma engines
2547 *
2548 * @rdev: radeon_device pointer
2549 *
2550 * Stop the async dma engines and free the rings (CIK).
2551 */
2552static void cik_sdma_fini(struct radeon_device *rdev)
2553{
2554 /* stop the gfx rings and rlc compute queues */
2555 cik_sdma_gfx_stop(rdev);
2556 cik_sdma_rlc_stop(rdev);
2557 /* halt the MEs */
2558 cik_sdma_enable(rdev, false);
2559 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2560 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2561 /* XXX - compute dma queue tear down */
2562}
2563
2564/**
2565 * cik_copy_dma - copy pages using the DMA engine
2566 *
2567 * @rdev: radeon_device pointer
2568 * @src_offset: src GPU address
2569 * @dst_offset: dst GPU address
2570 * @num_gpu_pages: number of GPU pages to xfer
2571 * @fence: radeon fence object
2572 *
2573 * Copy GPU paging using the DMA engine (CIK).
2574 * Used by the radeon ttm implementation to move pages if
2575 * registered as the asic copy callback.
2576 */
2577int cik_copy_dma(struct radeon_device *rdev,
2578 uint64_t src_offset, uint64_t dst_offset,
2579 unsigned num_gpu_pages,
2580 struct radeon_fence **fence)
2581{
2582 struct radeon_semaphore *sem = NULL;
2583 int ring_index = rdev->asic->copy.dma_ring_index;
2584 struct radeon_ring *ring = &rdev->ring[ring_index];
2585 u32 size_in_bytes, cur_size_in_bytes;
2586 int i, num_loops;
2587 int r = 0;
2588
2589 r = radeon_semaphore_create(rdev, &sem);
2590 if (r) {
2591 DRM_ERROR("radeon: moving bo (%d).\n", r);
2592 return r;
2593 }
2594
2595 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
2596 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
2597 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
2598 if (r) {
2599 DRM_ERROR("radeon: moving bo (%d).\n", r);
2600 radeon_semaphore_free(rdev, &sem, NULL);
2601 return r;
2602 }
2603
2604 if (radeon_fence_need_sync(*fence, ring->idx)) {
2605 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2606 ring->idx);
2607 radeon_fence_note_sync(*fence, ring->idx);
2608 } else {
2609 radeon_semaphore_free(rdev, &sem, NULL);
2610 }
2611
2612 for (i = 0; i < num_loops; i++) {
2613 cur_size_in_bytes = size_in_bytes;
2614 if (cur_size_in_bytes > 0x1fffff)
2615 cur_size_in_bytes = 0x1fffff;
2616 size_in_bytes -= cur_size_in_bytes;
2617 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
2618 radeon_ring_write(ring, cur_size_in_bytes);
2619 radeon_ring_write(ring, 0); /* src/dst endian swap */
2620 radeon_ring_write(ring, src_offset & 0xffffffff);
2621 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
2622 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2623 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
2624 src_offset += cur_size_in_bytes;
2625 dst_offset += cur_size_in_bytes;
2626 }
2627
2628 r = radeon_fence_emit(rdev, fence, ring->idx);
2629 if (r) {
2630 radeon_ring_unlock_undo(rdev, ring);
2631 return r;
2632 }
2633
2634 radeon_ring_unlock_commit(rdev, ring);
2635 radeon_semaphore_free(rdev, &sem, *fence);
2636
2637 return r;
2638}
2639
2640/**
2641 * cik_sdma_ring_test - simple async dma engine test
2642 *
2643 * @rdev: radeon_device pointer
2644 * @ring: radeon_ring structure holding ring information
2645 *
2646 * Test the DMA engine by writing using it to write an
2647 * value to memory. (CIK).
2648 * Returns 0 for success, error for failure.
2649 */
2650int cik_sdma_ring_test(struct radeon_device *rdev,
2651 struct radeon_ring *ring)
2652{
2653 unsigned i;
2654 int r;
2655 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2656 u32 tmp;
2657
2658 if (!ptr) {
2659 DRM_ERROR("invalid vram scratch pointer\n");
2660 return -EINVAL;
2661 }
2662
2663 tmp = 0xCAFEDEAD;
2664 writel(tmp, ptr);
2665
2666 r = radeon_ring_lock(rdev, ring, 4);
2667 if (r) {
2668 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2669 return r;
2670 }
2671 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2672 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2673 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
2674 radeon_ring_write(ring, 1); /* number of DWs to follow */
2675 radeon_ring_write(ring, 0xDEADBEEF);
2676 radeon_ring_unlock_commit(rdev, ring);
2677
2678 for (i = 0; i < rdev->usec_timeout; i++) {
2679 tmp = readl(ptr);
2680 if (tmp == 0xDEADBEEF)
2681 break;
2682 DRM_UDELAY(1);
2683 }
2684
2685 if (i < rdev->usec_timeout) {
2686 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2687 } else {
2688 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2689 ring->idx, tmp);
2690 r = -EINVAL;
2691 }
2692 return r;
2693}
2694
2695/**
2696 * cik_sdma_ib_test - test an IB on the DMA engine
2697 *
2698 * @rdev: radeon_device pointer
2699 * @ring: radeon_ring structure holding ring information
2700 *
2701 * Test a simple IB in the DMA ring (CIK).
2702 * Returns 0 on success, error on failure.
2703 */
2704int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2705{
2706 struct radeon_ib ib;
2707 unsigned i;
2708 int r;
2709 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2710 u32 tmp = 0;
2711
2712 if (!ptr) {
2713 DRM_ERROR("invalid vram scratch pointer\n");
2714 return -EINVAL;
2715 }
2716
2717 tmp = 0xCAFEDEAD;
2718 writel(tmp, ptr);
2719
2720 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2721 if (r) {
2722 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2723 return r;
2724 }
2725
2726 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2727 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
2728 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
2729 ib.ptr[3] = 1;
2730 ib.ptr[4] = 0xDEADBEEF;
2731 ib.length_dw = 5;
2732
2733 r = radeon_ib_schedule(rdev, &ib, NULL);
2734 if (r) {
2735 radeon_ib_free(rdev, &ib);
2736 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2737 return r;
2738 }
2739 r = radeon_fence_wait(ib.fence, false);
2740 if (r) {
2741 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2742 return r;
2743 }
2744 for (i = 0; i < rdev->usec_timeout; i++) {
2745 tmp = readl(ptr);
2746 if (tmp == 0xDEADBEEF)
2747 break;
2748 DRM_UDELAY(1);
2749 }
2750 if (i < rdev->usec_timeout) {
2751 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2752 } else {
2753 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
2754 r = -EINVAL;
2755 }
2756 radeon_ib_free(rdev, &ib);
2757 return r;
2758}
2759
6f2043ce 2760
cc066715 2761static void cik_print_gpu_status_regs(struct radeon_device *rdev)
6f2043ce 2762{
6f2043ce
AD
2763 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2764 RREG32(GRBM_STATUS));
2765 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2766 RREG32(GRBM_STATUS2));
2767 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2768 RREG32(GRBM_STATUS_SE0));
2769 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2770 RREG32(GRBM_STATUS_SE1));
2771 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2772 RREG32(GRBM_STATUS_SE2));
2773 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2774 RREG32(GRBM_STATUS_SE3));
2775 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2776 RREG32(SRBM_STATUS));
2777 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2778 RREG32(SRBM_STATUS2));
cc066715
AD
2779 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
2780 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
2781 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
2782 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
2783}
6f2043ce 2784
cc066715
AD
2785/**
2786 * cik_gpu_check_soft_reset - check which blocks are busy
2787 *
2788 * @rdev: radeon_device pointer
2789 *
2790 * Check which blocks are busy and return the relevant reset
2791 * mask to be used by cik_gpu_soft_reset().
2792 * Returns a mask of the blocks to be reset.
2793 */
2794static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
2795{
2796 u32 reset_mask = 0;
2797 u32 tmp;
6f2043ce 2798
cc066715
AD
2799 /* GRBM_STATUS */
2800 tmp = RREG32(GRBM_STATUS);
2801 if (tmp & (PA_BUSY | SC_BUSY |
2802 BCI_BUSY | SX_BUSY |
2803 TA_BUSY | VGT_BUSY |
2804 DB_BUSY | CB_BUSY |
2805 GDS_BUSY | SPI_BUSY |
2806 IA_BUSY | IA_BUSY_NO_DMA))
2807 reset_mask |= RADEON_RESET_GFX;
2808
2809 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
2810 reset_mask |= RADEON_RESET_CP;
2811
2812 /* GRBM_STATUS2 */
2813 tmp = RREG32(GRBM_STATUS2);
2814 if (tmp & RLC_BUSY)
2815 reset_mask |= RADEON_RESET_RLC;
2816
2817 /* SDMA0_STATUS_REG */
2818 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
2819 if (!(tmp & SDMA_IDLE))
2820 reset_mask |= RADEON_RESET_DMA;
2821
2822 /* SDMA1_STATUS_REG */
2823 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
2824 if (!(tmp & SDMA_IDLE))
2825 reset_mask |= RADEON_RESET_DMA1;
2826
2827 /* SRBM_STATUS2 */
2828 tmp = RREG32(SRBM_STATUS2);
2829 if (tmp & SDMA_BUSY)
2830 reset_mask |= RADEON_RESET_DMA;
2831
2832 if (tmp & SDMA1_BUSY)
2833 reset_mask |= RADEON_RESET_DMA1;
2834
2835 /* SRBM_STATUS */
2836 tmp = RREG32(SRBM_STATUS);
2837
2838 if (tmp & IH_BUSY)
2839 reset_mask |= RADEON_RESET_IH;
2840
2841 if (tmp & SEM_BUSY)
2842 reset_mask |= RADEON_RESET_SEM;
2843
2844 if (tmp & GRBM_RQ_PENDING)
2845 reset_mask |= RADEON_RESET_GRBM;
2846
2847 if (tmp & VMC_BUSY)
2848 reset_mask |= RADEON_RESET_VMC;
2849
2850 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
2851 MCC_BUSY | MCD_BUSY))
2852 reset_mask |= RADEON_RESET_MC;
2853
2854 if (evergreen_is_display_hung(rdev))
2855 reset_mask |= RADEON_RESET_DISPLAY;
2856
2857 /* Skip MC reset as it's mostly likely not hung, just busy */
2858 if (reset_mask & RADEON_RESET_MC) {
2859 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
2860 reset_mask &= ~RADEON_RESET_MC;
2861 }
2862
2863 return reset_mask;
6f2043ce
AD
2864}
2865
2866/**
cc066715 2867 * cik_gpu_soft_reset - soft reset GPU
6f2043ce
AD
2868 *
2869 * @rdev: radeon_device pointer
cc066715 2870 * @reset_mask: mask of which blocks to reset
6f2043ce 2871 *
cc066715 2872 * Soft reset the blocks specified in @reset_mask.
6f2043ce 2873 */
cc066715 2874static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
6f2043ce
AD
2875{
2876 struct evergreen_mc_save save;
cc066715
AD
2877 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
2878 u32 tmp;
2879
2880 if (reset_mask == 0)
2881 return;
2882
2883 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2884
2885 cik_print_gpu_status_regs(rdev);
2886 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
2887 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2888 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2889 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2890
2891 /* stop the rlc */
2892 cik_rlc_stop(rdev);
2893
2894 /* Disable GFX parsing/prefetching */
2895 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2896
2897 /* Disable MEC parsing/prefetching */
2898 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2899
2900 if (reset_mask & RADEON_RESET_DMA) {
2901 /* sdma0 */
2902 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
2903 tmp |= SDMA_HALT;
2904 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
2905 }
2906 if (reset_mask & RADEON_RESET_DMA1) {
2907 /* sdma1 */
2908 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
2909 tmp |= SDMA_HALT;
2910 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
2911 }
6f2043ce 2912
6f2043ce 2913 evergreen_mc_stop(rdev, &save);
cc066715 2914 if (evergreen_mc_wait_for_idle(rdev)) {
6f2043ce
AD
2915 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2916 }
6f2043ce 2917
cc066715
AD
2918 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
2919 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
2920
2921 if (reset_mask & RADEON_RESET_CP) {
2922 grbm_soft_reset |= SOFT_RESET_CP;
2923
2924 srbm_soft_reset |= SOFT_RESET_GRBM;
2925 }
2926
2927 if (reset_mask & RADEON_RESET_DMA)
2928 srbm_soft_reset |= SOFT_RESET_SDMA;
2929
2930 if (reset_mask & RADEON_RESET_DMA1)
2931 srbm_soft_reset |= SOFT_RESET_SDMA1;
2932
2933 if (reset_mask & RADEON_RESET_DISPLAY)
2934 srbm_soft_reset |= SOFT_RESET_DC;
2935
2936 if (reset_mask & RADEON_RESET_RLC)
2937 grbm_soft_reset |= SOFT_RESET_RLC;
2938
2939 if (reset_mask & RADEON_RESET_SEM)
2940 srbm_soft_reset |= SOFT_RESET_SEM;
2941
2942 if (reset_mask & RADEON_RESET_IH)
2943 srbm_soft_reset |= SOFT_RESET_IH;
2944
2945 if (reset_mask & RADEON_RESET_GRBM)
2946 srbm_soft_reset |= SOFT_RESET_GRBM;
2947
2948 if (reset_mask & RADEON_RESET_VMC)
2949 srbm_soft_reset |= SOFT_RESET_VMC;
2950
2951 if (!(rdev->flags & RADEON_IS_IGP)) {
2952 if (reset_mask & RADEON_RESET_MC)
2953 srbm_soft_reset |= SOFT_RESET_MC;
2954 }
2955
2956 if (grbm_soft_reset) {
2957 tmp = RREG32(GRBM_SOFT_RESET);
2958 tmp |= grbm_soft_reset;
2959 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
2960 WREG32(GRBM_SOFT_RESET, tmp);
2961 tmp = RREG32(GRBM_SOFT_RESET);
2962
2963 udelay(50);
2964
2965 tmp &= ~grbm_soft_reset;
2966 WREG32(GRBM_SOFT_RESET, tmp);
2967 tmp = RREG32(GRBM_SOFT_RESET);
2968 }
2969
2970 if (srbm_soft_reset) {
2971 tmp = RREG32(SRBM_SOFT_RESET);
2972 tmp |= srbm_soft_reset;
2973 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
2974 WREG32(SRBM_SOFT_RESET, tmp);
2975 tmp = RREG32(SRBM_SOFT_RESET);
2976
2977 udelay(50);
2978
2979 tmp &= ~srbm_soft_reset;
2980 WREG32(SRBM_SOFT_RESET, tmp);
2981 tmp = RREG32(SRBM_SOFT_RESET);
2982 }
6f2043ce 2983
6f2043ce
AD
2984 /* Wait a little for things to settle down */
2985 udelay(50);
cc066715 2986
6f2043ce 2987 evergreen_mc_resume(rdev, &save);
cc066715
AD
2988 udelay(50);
2989
2990 cik_print_gpu_status_regs(rdev);
6f2043ce
AD
2991}
2992
2993/**
cc066715 2994 * cik_asic_reset - soft reset GPU
6f2043ce
AD
2995 *
2996 * @rdev: radeon_device pointer
2997 *
cc066715
AD
2998 * Look up which blocks are hung and attempt
2999 * to reset them.
6f2043ce
AD
3000 * Returns 0 for success.
3001 */
3002int cik_asic_reset(struct radeon_device *rdev)
3003{
cc066715 3004 u32 reset_mask;
6f2043ce 3005
cc066715
AD
3006 reset_mask = cik_gpu_check_soft_reset(rdev);
3007
3008 if (reset_mask)
3009 r600_set_bios_scratch_engine_hung(rdev, true);
3010
3011 cik_gpu_soft_reset(rdev, reset_mask);
6f2043ce 3012
cc066715
AD
3013 reset_mask = cik_gpu_check_soft_reset(rdev);
3014
3015 if (!reset_mask)
3016 r600_set_bios_scratch_engine_hung(rdev, false);
3017
3018 return 0;
3019}
3020
3021/**
3022 * cik_gfx_is_lockup - check if the 3D engine is locked up
3023 *
3024 * @rdev: radeon_device pointer
3025 * @ring: radeon_ring structure holding ring information
3026 *
3027 * Check if the 3D engine is locked up (CIK).
3028 * Returns true if the engine is locked, false if not.
3029 */
3030bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3031{
3032 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3033
3034 if (!(reset_mask & (RADEON_RESET_GFX |
3035 RADEON_RESET_COMPUTE |
3036 RADEON_RESET_CP))) {
3037 radeon_ring_lockup_update(ring);
3038 return false;
3039 }
3040 /* force CP activities */
3041 radeon_ring_force_activity(rdev, ring);
3042 return radeon_ring_test_lockup(rdev, ring);
6f2043ce 3043}
1c49165d 3044
21a93e13
AD
3045/**
3046 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3047 *
3048 * @rdev: radeon_device pointer
3049 * @ring: radeon_ring structure holding ring information
3050 *
3051 * Check if the async DMA engine is locked up (CIK).
3052 * Returns true if the engine appears to be locked up, false if not.
3053 */
3054bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3055{
cc066715
AD
3056 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3057 u32 mask;
21a93e13
AD
3058
3059 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
cc066715 3060 mask = RADEON_RESET_DMA;
21a93e13 3061 else
cc066715
AD
3062 mask = RADEON_RESET_DMA1;
3063
3064 if (!(reset_mask & mask)) {
21a93e13
AD
3065 radeon_ring_lockup_update(ring);
3066 return false;
3067 }
3068 /* force ring activities */
3069 radeon_ring_force_activity(rdev, ring);
3070 return radeon_ring_test_lockup(rdev, ring);
3071}
3072
1c49165d
AD
3073/* MC */
3074/**
3075 * cik_mc_program - program the GPU memory controller
3076 *
3077 * @rdev: radeon_device pointer
3078 *
3079 * Set the location of vram, gart, and AGP in the GPU's
3080 * physical address space (CIK).
3081 */
3082static void cik_mc_program(struct radeon_device *rdev)
3083{
3084 struct evergreen_mc_save save;
3085 u32 tmp;
3086 int i, j;
3087
3088 /* Initialize HDP */
3089 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3090 WREG32((0x2c14 + j), 0x00000000);
3091 WREG32((0x2c18 + j), 0x00000000);
3092 WREG32((0x2c1c + j), 0x00000000);
3093 WREG32((0x2c20 + j), 0x00000000);
3094 WREG32((0x2c24 + j), 0x00000000);
3095 }
3096 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3097
3098 evergreen_mc_stop(rdev, &save);
3099 if (radeon_mc_wait_for_idle(rdev)) {
3100 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3101 }
3102 /* Lockout access through VGA aperture*/
3103 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3104 /* Update configuration */
3105 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3106 rdev->mc.vram_start >> 12);
3107 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3108 rdev->mc.vram_end >> 12);
3109 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3110 rdev->vram_scratch.gpu_addr >> 12);
3111 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3112 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3113 WREG32(MC_VM_FB_LOCATION, tmp);
3114 /* XXX double check these! */
3115 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3116 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3117 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3118 WREG32(MC_VM_AGP_BASE, 0);
3119 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3120 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3121 if (radeon_mc_wait_for_idle(rdev)) {
3122 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3123 }
3124 evergreen_mc_resume(rdev, &save);
3125 /* we need to own VRAM, so turn off the VGA renderer here
3126 * to stop it overwriting our objects */
3127 rv515_vga_render_disable(rdev);
3128}
3129
3130/**
3131 * cik_mc_init - initialize the memory controller driver params
3132 *
3133 * @rdev: radeon_device pointer
3134 *
3135 * Look up the amount of vram, vram width, and decide how to place
3136 * vram and gart within the GPU's physical address space (CIK).
3137 * Returns 0 for success.
3138 */
3139static int cik_mc_init(struct radeon_device *rdev)
3140{
3141 u32 tmp;
3142 int chansize, numchan;
3143
3144 /* Get VRAM informations */
3145 rdev->mc.vram_is_ddr = true;
3146 tmp = RREG32(MC_ARB_RAMCFG);
3147 if (tmp & CHANSIZE_MASK) {
3148 chansize = 64;
3149 } else {
3150 chansize = 32;
3151 }
3152 tmp = RREG32(MC_SHARED_CHMAP);
3153 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3154 case 0:
3155 default:
3156 numchan = 1;
3157 break;
3158 case 1:
3159 numchan = 2;
3160 break;
3161 case 2:
3162 numchan = 4;
3163 break;
3164 case 3:
3165 numchan = 8;
3166 break;
3167 case 4:
3168 numchan = 3;
3169 break;
3170 case 5:
3171 numchan = 6;
3172 break;
3173 case 6:
3174 numchan = 10;
3175 break;
3176 case 7:
3177 numchan = 12;
3178 break;
3179 case 8:
3180 numchan = 16;
3181 break;
3182 }
3183 rdev->mc.vram_width = numchan * chansize;
3184 /* Could aper size report 0 ? */
3185 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3186 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3187 /* size in MB on si */
3188 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3189 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3190 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3191 si_vram_gtt_location(rdev, &rdev->mc);
3192 radeon_update_bandwidth_info(rdev);
3193
3194 return 0;
3195}
3196
3197/*
3198 * GART
3199 * VMID 0 is the physical GPU addresses as used by the kernel.
3200 * VMIDs 1-15 are used for userspace clients and are handled
3201 * by the radeon vm/hsa code.
3202 */
3203/**
3204 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3205 *
3206 * @rdev: radeon_device pointer
3207 *
3208 * Flush the TLB for the VMID 0 page table (CIK).
3209 */
3210void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3211{
3212 /* flush hdp cache */
3213 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3214
3215 /* bits 0-15 are the VM contexts0-15 */
3216 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3217}
3218
3219/**
3220 * cik_pcie_gart_enable - gart enable
3221 *
3222 * @rdev: radeon_device pointer
3223 *
3224 * This sets up the TLBs, programs the page tables for VMID0,
3225 * sets up the hw for VMIDs 1-15 which are allocated on
3226 * demand, and sets up the global locations for the LDS, GDS,
3227 * and GPUVM for FSA64 clients (CIK).
3228 * Returns 0 for success, errors for failure.
3229 */
3230static int cik_pcie_gart_enable(struct radeon_device *rdev)
3231{
3232 int r, i;
3233
3234 if (rdev->gart.robj == NULL) {
3235 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3236 return -EINVAL;
3237 }
3238 r = radeon_gart_table_vram_pin(rdev);
3239 if (r)
3240 return r;
3241 radeon_gart_restore(rdev);
3242 /* Setup TLB control */
3243 WREG32(MC_VM_MX_L1_TLB_CNTL,
3244 (0xA << 7) |
3245 ENABLE_L1_TLB |
3246 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3247 ENABLE_ADVANCED_DRIVER_MODEL |
3248 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3249 /* Setup L2 cache */
3250 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3251 ENABLE_L2_FRAGMENT_PROCESSING |
3252 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3253 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3254 EFFECTIVE_L2_QUEUE_SIZE(7) |
3255 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3256 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3257 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3258 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3259 /* setup context0 */
3260 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3261 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3262 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3263 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3264 (u32)(rdev->dummy_page.addr >> 12));
3265 WREG32(VM_CONTEXT0_CNTL2, 0);
3266 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3267 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3268
3269 WREG32(0x15D4, 0);
3270 WREG32(0x15D8, 0);
3271 WREG32(0x15DC, 0);
3272
3273 /* empty context1-15 */
3274 /* FIXME start with 4G, once using 2 level pt switch to full
3275 * vm size space
3276 */
3277 /* set vm size, must be a multiple of 4 */
3278 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3279 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3280 for (i = 1; i < 16; i++) {
3281 if (i < 8)
3282 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3283 rdev->gart.table_addr >> 12);
3284 else
3285 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3286 rdev->gart.table_addr >> 12);
3287 }
3288
3289 /* enable context1-15 */
3290 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3291 (u32)(rdev->dummy_page.addr >> 12));
a00024b0 3292 WREG32(VM_CONTEXT1_CNTL2, 4);
1c49165d 3293 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
a00024b0
AD
3294 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3295 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3296 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3297 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3298 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3299 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3300 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3301 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3302 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3303 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3304 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3305 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1c49165d
AD
3306
3307 /* TC cache setup ??? */
3308 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3309 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3310 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3311
3312 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3313 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3314 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3315 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3316 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3317
3318 WREG32(TC_CFG_L1_VOLATILE, 0);
3319 WREG32(TC_CFG_L2_VOLATILE, 0);
3320
3321 if (rdev->family == CHIP_KAVERI) {
3322 u32 tmp = RREG32(CHUB_CONTROL);
3323 tmp &= ~BYPASS_VM;
3324 WREG32(CHUB_CONTROL, tmp);
3325 }
3326
3327 /* XXX SH_MEM regs */
3328 /* where to put LDS, scratch, GPUVM in FSA64 space */
3329 for (i = 0; i < 16; i++) {
3330 WREG32(SRBM_GFX_CNTL, VMID(i));
21a93e13 3331 /* CP and shaders */
1c49165d
AD
3332 WREG32(SH_MEM_CONFIG, 0);
3333 WREG32(SH_MEM_APE1_BASE, 1);
3334 WREG32(SH_MEM_APE1_LIMIT, 0);
3335 WREG32(SH_MEM_BASES, 0);
21a93e13
AD
3336 /* SDMA GFX */
3337 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3338 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3339 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3340 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3341 /* XXX SDMA RLC - todo */
1c49165d
AD
3342 }
3343 WREG32(SRBM_GFX_CNTL, 0);
3344
3345 cik_pcie_gart_tlb_flush(rdev);
3346 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3347 (unsigned)(rdev->mc.gtt_size >> 20),
3348 (unsigned long long)rdev->gart.table_addr);
3349 rdev->gart.ready = true;
3350 return 0;
3351}
3352
3353/**
3354 * cik_pcie_gart_disable - gart disable
3355 *
3356 * @rdev: radeon_device pointer
3357 *
3358 * This disables all VM page table (CIK).
3359 */
3360static void cik_pcie_gart_disable(struct radeon_device *rdev)
3361{
3362 /* Disable all tables */
3363 WREG32(VM_CONTEXT0_CNTL, 0);
3364 WREG32(VM_CONTEXT1_CNTL, 0);
3365 /* Setup TLB control */
3366 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3367 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3368 /* Setup L2 cache */
3369 WREG32(VM_L2_CNTL,
3370 ENABLE_L2_FRAGMENT_PROCESSING |
3371 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3372 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3373 EFFECTIVE_L2_QUEUE_SIZE(7) |
3374 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3375 WREG32(VM_L2_CNTL2, 0);
3376 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3377 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3378 radeon_gart_table_vram_unpin(rdev);
3379}
3380
3381/**
3382 * cik_pcie_gart_fini - vm fini callback
3383 *
3384 * @rdev: radeon_device pointer
3385 *
3386 * Tears down the driver GART/VM setup (CIK).
3387 */
3388static void cik_pcie_gart_fini(struct radeon_device *rdev)
3389{
3390 cik_pcie_gart_disable(rdev);
3391 radeon_gart_table_vram_free(rdev);
3392 radeon_gart_fini(rdev);
3393}
3394
3395/* vm parser */
3396/**
3397 * cik_ib_parse - vm ib_parse callback
3398 *
3399 * @rdev: radeon_device pointer
3400 * @ib: indirect buffer pointer
3401 *
3402 * CIK uses hw IB checking so this is a nop (CIK).
3403 */
3404int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3405{
3406 return 0;
3407}
3408
3409/*
3410 * vm
3411 * VMID 0 is the physical GPU addresses as used by the kernel.
3412 * VMIDs 1-15 are used for userspace clients and are handled
3413 * by the radeon vm/hsa code.
3414 */
3415/**
3416 * cik_vm_init - cik vm init callback
3417 *
3418 * @rdev: radeon_device pointer
3419 *
3420 * Inits cik specific vm parameters (number of VMs, base of vram for
3421 * VMIDs 1-15) (CIK).
3422 * Returns 0 for success.
3423 */
3424int cik_vm_init(struct radeon_device *rdev)
3425{
3426 /* number of VMs */
3427 rdev->vm_manager.nvm = 16;
3428 /* base offset of vram pages */
3429 if (rdev->flags & RADEON_IS_IGP) {
3430 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3431 tmp <<= 22;
3432 rdev->vm_manager.vram_base_offset = tmp;
3433 } else
3434 rdev->vm_manager.vram_base_offset = 0;
3435
3436 return 0;
3437}
3438
3439/**
3440 * cik_vm_fini - cik vm fini callback
3441 *
3442 * @rdev: radeon_device pointer
3443 *
3444 * Tear down any asic specific VM setup (CIK).
3445 */
3446void cik_vm_fini(struct radeon_device *rdev)
3447{
3448}
3449
f96ab484
AD
3450/**
3451 * cik_vm_flush - cik vm flush using the CP
3452 *
3453 * @rdev: radeon_device pointer
3454 *
3455 * Update the page table base and flush the VM TLB
3456 * using the CP (CIK).
3457 */
3458void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3459{
3460 struct radeon_ring *ring = &rdev->ring[ridx];
3461
3462 if (vm == NULL)
3463 return;
3464
3465 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3466 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3467 WRITE_DATA_DST_SEL(0)));
3468 if (vm->id < 8) {
3469 radeon_ring_write(ring,
3470 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3471 } else {
3472 radeon_ring_write(ring,
3473 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3474 }
3475 radeon_ring_write(ring, 0);
3476 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3477
3478 /* update SH_MEM_* regs */
3479 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3480 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3481 WRITE_DATA_DST_SEL(0)));
3482 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3483 radeon_ring_write(ring, 0);
3484 radeon_ring_write(ring, VMID(vm->id));
3485
3486 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3487 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3488 WRITE_DATA_DST_SEL(0)));
3489 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3490 radeon_ring_write(ring, 0);
3491
3492 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
3493 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
3494 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3495 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3496
3497 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3498 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3499 WRITE_DATA_DST_SEL(0)));
3500 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3501 radeon_ring_write(ring, 0);
3502 radeon_ring_write(ring, VMID(0));
3503
3504 /* HDP flush */
3505 /* We should be using the WAIT_REG_MEM packet here like in
3506 * cik_fence_ring_emit(), but it causes the CP to hang in this
3507 * context...
3508 */
3509 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3510 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3511 WRITE_DATA_DST_SEL(0)));
3512 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3513 radeon_ring_write(ring, 0);
3514 radeon_ring_write(ring, 0);
3515
3516 /* bits 0-15 are the VM contexts0-15 */
3517 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3518 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3519 WRITE_DATA_DST_SEL(0)));
3520 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3521 radeon_ring_write(ring, 0);
3522 radeon_ring_write(ring, 1 << vm->id);
3523
3524 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3525 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3526 radeon_ring_write(ring, 0x0);
3527}
3528
d0e092d9
AD
3529/**
3530 * cik_vm_set_page - update the page tables using sDMA
3531 *
3532 * @rdev: radeon_device pointer
3533 * @ib: indirect buffer to fill with commands
3534 * @pe: addr of the page entry
3535 * @addr: dst addr to write into pe
3536 * @count: number of page entries to update
3537 * @incr: increase next addr by incr bytes
3538 * @flags: access flags
3539 *
3540 * Update the page tables using CP or sDMA (CIK).
3541 */
3542void cik_vm_set_page(struct radeon_device *rdev,
3543 struct radeon_ib *ib,
3544 uint64_t pe,
3545 uint64_t addr, unsigned count,
3546 uint32_t incr, uint32_t flags)
3547{
3548 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3549 uint64_t value;
3550 unsigned ndw;
3551
3552 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3553 /* CP */
3554 while (count) {
3555 ndw = 2 + count * 2;
3556 if (ndw > 0x3FFE)
3557 ndw = 0x3FFE;
3558
3559 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3560 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3561 WRITE_DATA_DST_SEL(1));
3562 ib->ptr[ib->length_dw++] = pe;
3563 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3564 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3565 if (flags & RADEON_VM_PAGE_SYSTEM) {
3566 value = radeon_vm_map_gart(rdev, addr);
3567 value &= 0xFFFFFFFFFFFFF000ULL;
3568 } else if (flags & RADEON_VM_PAGE_VALID) {
3569 value = addr;
3570 } else {
3571 value = 0;
3572 }
3573 addr += incr;
3574 value |= r600_flags;
3575 ib->ptr[ib->length_dw++] = value;
3576 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3577 }
3578 }
3579 } else {
3580 /* DMA */
3581 if (flags & RADEON_VM_PAGE_SYSTEM) {
3582 while (count) {
3583 ndw = count * 2;
3584 if (ndw > 0xFFFFE)
3585 ndw = 0xFFFFE;
3586
3587 /* for non-physically contiguous pages (system) */
3588 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3589 ib->ptr[ib->length_dw++] = pe;
3590 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3591 ib->ptr[ib->length_dw++] = ndw;
3592 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3593 if (flags & RADEON_VM_PAGE_SYSTEM) {
3594 value = radeon_vm_map_gart(rdev, addr);
3595 value &= 0xFFFFFFFFFFFFF000ULL;
3596 } else if (flags & RADEON_VM_PAGE_VALID) {
3597 value = addr;
3598 } else {
3599 value = 0;
3600 }
3601 addr += incr;
3602 value |= r600_flags;
3603 ib->ptr[ib->length_dw++] = value;
3604 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3605 }
3606 }
3607 } else {
3608 while (count) {
3609 ndw = count;
3610 if (ndw > 0x7FFFF)
3611 ndw = 0x7FFFF;
3612
3613 if (flags & RADEON_VM_PAGE_VALID)
3614 value = addr;
3615 else
3616 value = 0;
3617 /* for physically contiguous pages (vram) */
3618 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
3619 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3620 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3621 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3622 ib->ptr[ib->length_dw++] = 0;
3623 ib->ptr[ib->length_dw++] = value; /* value */
3624 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3625 ib->ptr[ib->length_dw++] = incr; /* increment size */
3626 ib->ptr[ib->length_dw++] = 0;
3627 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
3628 pe += ndw * 8;
3629 addr += ndw * incr;
3630 count -= ndw;
3631 }
3632 }
3633 while (ib->length_dw & 0x7)
3634 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
3635 }
3636}
3637
605de6b9
AD
3638/**
3639 * cik_dma_vm_flush - cik vm flush using sDMA
3640 *
3641 * @rdev: radeon_device pointer
3642 *
3643 * Update the page table base and flush the VM TLB
3644 * using sDMA (CIK).
3645 */
3646void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3647{
3648 struct radeon_ring *ring = &rdev->ring[ridx];
3649 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3650 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3651 u32 ref_and_mask;
3652
3653 if (vm == NULL)
3654 return;
3655
3656 if (ridx == R600_RING_TYPE_DMA_INDEX)
3657 ref_and_mask = SDMA0;
3658 else
3659 ref_and_mask = SDMA1;
3660
3661 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3662 if (vm->id < 8) {
3663 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3664 } else {
3665 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3666 }
3667 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3668
3669 /* update SH_MEM_* regs */
3670 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3671 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3672 radeon_ring_write(ring, VMID(vm->id));
3673
3674 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3675 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3676 radeon_ring_write(ring, 0);
3677
3678 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3679 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
3680 radeon_ring_write(ring, 0);
3681
3682 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3683 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
3684 radeon_ring_write(ring, 1);
3685
3686 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3687 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
3688 radeon_ring_write(ring, 0);
3689
3690 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3691 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3692 radeon_ring_write(ring, VMID(0));
3693
3694 /* flush HDP */
3695 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3696 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3697 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3698 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3699 radeon_ring_write(ring, ref_and_mask); /* MASK */
3700 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3701
3702 /* flush TLB */
3703 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3704 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3705 radeon_ring_write(ring, 1 << vm->id);
3706}
3707
f6796cae
AD
3708/*
3709 * RLC
3710 * The RLC is a multi-purpose microengine that handles a
3711 * variety of functions, the most important of which is
3712 * the interrupt controller.
3713 */
3714/**
3715 * cik_rlc_stop - stop the RLC ME
3716 *
3717 * @rdev: radeon_device pointer
3718 *
3719 * Halt the RLC ME (MicroEngine) (CIK).
3720 */
3721static void cik_rlc_stop(struct radeon_device *rdev)
3722{
3723 int i, j, k;
3724 u32 mask, tmp;
3725
3726 tmp = RREG32(CP_INT_CNTL_RING0);
3727 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3728 WREG32(CP_INT_CNTL_RING0, tmp);
3729
3730 RREG32(CB_CGTT_SCLK_CTRL);
3731 RREG32(CB_CGTT_SCLK_CTRL);
3732 RREG32(CB_CGTT_SCLK_CTRL);
3733 RREG32(CB_CGTT_SCLK_CTRL);
3734
3735 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3736 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
3737
3738 WREG32(RLC_CNTL, 0);
3739
3740 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3741 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3742 cik_select_se_sh(rdev, i, j);
3743 for (k = 0; k < rdev->usec_timeout; k++) {
3744 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
3745 break;
3746 udelay(1);
3747 }
3748 }
3749 }
3750 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3751
3752 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
3753 for (k = 0; k < rdev->usec_timeout; k++) {
3754 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3755 break;
3756 udelay(1);
3757 }
3758}
3759
3760/**
3761 * cik_rlc_start - start the RLC ME
3762 *
3763 * @rdev: radeon_device pointer
3764 *
3765 * Unhalt the RLC ME (MicroEngine) (CIK).
3766 */
3767static void cik_rlc_start(struct radeon_device *rdev)
3768{
3769 u32 tmp;
3770
3771 WREG32(RLC_CNTL, RLC_ENABLE);
3772
3773 tmp = RREG32(CP_INT_CNTL_RING0);
3774 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3775 WREG32(CP_INT_CNTL_RING0, tmp);
3776
3777 udelay(50);
3778}
3779
3780/**
3781 * cik_rlc_resume - setup the RLC hw
3782 *
3783 * @rdev: radeon_device pointer
3784 *
3785 * Initialize the RLC registers, load the ucode,
3786 * and start the RLC (CIK).
3787 * Returns 0 for success, -EINVAL if the ucode is not available.
3788 */
3789static int cik_rlc_resume(struct radeon_device *rdev)
3790{
3791 u32 i, size;
3792 u32 clear_state_info[3];
3793 const __be32 *fw_data;
3794
3795 if (!rdev->rlc_fw)
3796 return -EINVAL;
3797
3798 switch (rdev->family) {
3799 case CHIP_BONAIRE:
3800 default:
3801 size = BONAIRE_RLC_UCODE_SIZE;
3802 break;
3803 case CHIP_KAVERI:
3804 size = KV_RLC_UCODE_SIZE;
3805 break;
3806 case CHIP_KABINI:
3807 size = KB_RLC_UCODE_SIZE;
3808 break;
3809 }
3810
3811 cik_rlc_stop(rdev);
3812
3813 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
3814 RREG32(GRBM_SOFT_RESET);
3815 udelay(50);
3816 WREG32(GRBM_SOFT_RESET, 0);
3817 RREG32(GRBM_SOFT_RESET);
3818 udelay(50);
3819
3820 WREG32(RLC_LB_CNTR_INIT, 0);
3821 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
3822
3823 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3824 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
3825 WREG32(RLC_LB_PARAMS, 0x00600408);
3826 WREG32(RLC_LB_CNTL, 0x80000004);
3827
3828 WREG32(RLC_MC_CNTL, 0);
3829 WREG32(RLC_UCODE_CNTL, 0);
3830
3831 fw_data = (const __be32 *)rdev->rlc_fw->data;
3832 WREG32(RLC_GPM_UCODE_ADDR, 0);
3833 for (i = 0; i < size; i++)
3834 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
3835 WREG32(RLC_GPM_UCODE_ADDR, 0);
3836
3837 /* XXX */
3838 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
3839 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
3840 clear_state_info[2] = 0;//cik_default_size;
3841 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
3842 for (i = 0; i < 3; i++)
3843 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
3844 WREG32(RLC_DRIVER_DMA_STATUS, 0);
3845
3846 cik_rlc_start(rdev);
3847
3848 return 0;
3849}
a59781bb
AD
3850
3851/*
3852 * Interrupts
3853 * Starting with r6xx, interrupts are handled via a ring buffer.
3854 * Ring buffers are areas of GPU accessible memory that the GPU
3855 * writes interrupt vectors into and the host reads vectors out of.
3856 * There is a rptr (read pointer) that determines where the
3857 * host is currently reading, and a wptr (write pointer)
3858 * which determines where the GPU has written. When the
3859 * pointers are equal, the ring is idle. When the GPU
3860 * writes vectors to the ring buffer, it increments the
3861 * wptr. When there is an interrupt, the host then starts
3862 * fetching commands and processing them until the pointers are
3863 * equal again at which point it updates the rptr.
3864 */
3865
3866/**
3867 * cik_enable_interrupts - Enable the interrupt ring buffer
3868 *
3869 * @rdev: radeon_device pointer
3870 *
3871 * Enable the interrupt ring buffer (CIK).
3872 */
3873static void cik_enable_interrupts(struct radeon_device *rdev)
3874{
3875 u32 ih_cntl = RREG32(IH_CNTL);
3876 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3877
3878 ih_cntl |= ENABLE_INTR;
3879 ih_rb_cntl |= IH_RB_ENABLE;
3880 WREG32(IH_CNTL, ih_cntl);
3881 WREG32(IH_RB_CNTL, ih_rb_cntl);
3882 rdev->ih.enabled = true;
3883}
3884
3885/**
3886 * cik_disable_interrupts - Disable the interrupt ring buffer
3887 *
3888 * @rdev: radeon_device pointer
3889 *
3890 * Disable the interrupt ring buffer (CIK).
3891 */
3892static void cik_disable_interrupts(struct radeon_device *rdev)
3893{
3894 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3895 u32 ih_cntl = RREG32(IH_CNTL);
3896
3897 ih_rb_cntl &= ~IH_RB_ENABLE;
3898 ih_cntl &= ~ENABLE_INTR;
3899 WREG32(IH_RB_CNTL, ih_rb_cntl);
3900 WREG32(IH_CNTL, ih_cntl);
3901 /* set rptr, wptr to 0 */
3902 WREG32(IH_RB_RPTR, 0);
3903 WREG32(IH_RB_WPTR, 0);
3904 rdev->ih.enabled = false;
3905 rdev->ih.rptr = 0;
3906}
3907
3908/**
3909 * cik_disable_interrupt_state - Disable all interrupt sources
3910 *
3911 * @rdev: radeon_device pointer
3912 *
3913 * Clear all interrupt enable bits used by the driver (CIK).
3914 */
3915static void cik_disable_interrupt_state(struct radeon_device *rdev)
3916{
3917 u32 tmp;
3918
3919 /* gfx ring */
3920 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
21a93e13
AD
3921 /* sdma */
3922 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3923 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3924 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3925 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
a59781bb
AD
3926 /* compute queues */
3927 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
3928 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
3929 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
3930 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3931 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3932 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3933 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3934 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3935 /* grbm */
3936 WREG32(GRBM_INT_CNTL, 0);
3937 /* vline/vblank, etc. */
3938 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3939 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3940 if (rdev->num_crtc >= 4) {
3941 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3942 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3943 }
3944 if (rdev->num_crtc >= 6) {
3945 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3946 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3947 }
3948
3949 /* dac hotplug */
3950 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3951
3952 /* digital hotplug */
3953 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3954 WREG32(DC_HPD1_INT_CONTROL, tmp);
3955 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3956 WREG32(DC_HPD2_INT_CONTROL, tmp);
3957 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3958 WREG32(DC_HPD3_INT_CONTROL, tmp);
3959 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3960 WREG32(DC_HPD4_INT_CONTROL, tmp);
3961 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3962 WREG32(DC_HPD5_INT_CONTROL, tmp);
3963 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3964 WREG32(DC_HPD6_INT_CONTROL, tmp);
3965
3966}
3967
3968/**
3969 * cik_irq_init - init and enable the interrupt ring
3970 *
3971 * @rdev: radeon_device pointer
3972 *
3973 * Allocate a ring buffer for the interrupt controller,
3974 * enable the RLC, disable interrupts, enable the IH
3975 * ring buffer and enable it (CIK).
3976 * Called at device load and reume.
3977 * Returns 0 for success, errors for failure.
3978 */
3979static int cik_irq_init(struct radeon_device *rdev)
3980{
3981 int ret = 0;
3982 int rb_bufsz;
3983 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3984
3985 /* allocate ring */
3986 ret = r600_ih_ring_alloc(rdev);
3987 if (ret)
3988 return ret;
3989
3990 /* disable irqs */
3991 cik_disable_interrupts(rdev);
3992
3993 /* init rlc */
3994 ret = cik_rlc_resume(rdev);
3995 if (ret) {
3996 r600_ih_ring_fini(rdev);
3997 return ret;
3998 }
3999
4000 /* setup interrupt control */
4001 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4002 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4003 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4004 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4005 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4006 */
4007 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4008 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4009 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4010 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4011
4012 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4013 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4014
4015 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4016 IH_WPTR_OVERFLOW_CLEAR |
4017 (rb_bufsz << 1));
4018
4019 if (rdev->wb.enabled)
4020 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4021
4022 /* set the writeback address whether it's enabled or not */
4023 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4024 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4025
4026 WREG32(IH_RB_CNTL, ih_rb_cntl);
4027
4028 /* set rptr, wptr to 0 */
4029 WREG32(IH_RB_RPTR, 0);
4030 WREG32(IH_RB_WPTR, 0);
4031
4032 /* Default settings for IH_CNTL (disabled at first) */
4033 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4034 /* RPTR_REARM only works if msi's are enabled */
4035 if (rdev->msi_enabled)
4036 ih_cntl |= RPTR_REARM;
4037 WREG32(IH_CNTL, ih_cntl);
4038
4039 /* force the active interrupt state to all disabled */
4040 cik_disable_interrupt_state(rdev);
4041
4042 pci_set_master(rdev->pdev);
4043
4044 /* enable irqs */
4045 cik_enable_interrupts(rdev);
4046
4047 return ret;
4048}
4049
4050/**
4051 * cik_irq_set - enable/disable interrupt sources
4052 *
4053 * @rdev: radeon_device pointer
4054 *
4055 * Enable interrupt sources on the GPU (vblanks, hpd,
4056 * etc.) (CIK).
4057 * Returns 0 for success, errors for failure.
4058 */
4059int cik_irq_set(struct radeon_device *rdev)
4060{
4061 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4062 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4063 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4064 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4065 u32 grbm_int_cntl = 0;
21a93e13 4066 u32 dma_cntl, dma_cntl1;
a59781bb
AD
4067
4068 if (!rdev->irq.installed) {
4069 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4070 return -EINVAL;
4071 }
4072 /* don't enable anything if the ih is disabled */
4073 if (!rdev->ih.enabled) {
4074 cik_disable_interrupts(rdev);
4075 /* force the active interrupt state to all disabled */
4076 cik_disable_interrupt_state(rdev);
4077 return 0;
4078 }
4079
4080 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4081 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4082 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4083 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4084 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4085 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4086
21a93e13
AD
4087 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4088 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4089
a59781bb
AD
4090 /* enable CP interrupts on all rings */
4091 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4092 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4093 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4094 }
4095 /* TODO: compute queues! */
4096 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4097
21a93e13
AD
4098 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4099 DRM_DEBUG("cik_irq_set: sw int dma\n");
4100 dma_cntl |= TRAP_ENABLE;
4101 }
4102
4103 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4104 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4105 dma_cntl1 |= TRAP_ENABLE;
4106 }
4107
a59781bb
AD
4108 if (rdev->irq.crtc_vblank_int[0] ||
4109 atomic_read(&rdev->irq.pflip[0])) {
4110 DRM_DEBUG("cik_irq_set: vblank 0\n");
4111 crtc1 |= VBLANK_INTERRUPT_MASK;
4112 }
4113 if (rdev->irq.crtc_vblank_int[1] ||
4114 atomic_read(&rdev->irq.pflip[1])) {
4115 DRM_DEBUG("cik_irq_set: vblank 1\n");
4116 crtc2 |= VBLANK_INTERRUPT_MASK;
4117 }
4118 if (rdev->irq.crtc_vblank_int[2] ||
4119 atomic_read(&rdev->irq.pflip[2])) {
4120 DRM_DEBUG("cik_irq_set: vblank 2\n");
4121 crtc3 |= VBLANK_INTERRUPT_MASK;
4122 }
4123 if (rdev->irq.crtc_vblank_int[3] ||
4124 atomic_read(&rdev->irq.pflip[3])) {
4125 DRM_DEBUG("cik_irq_set: vblank 3\n");
4126 crtc4 |= VBLANK_INTERRUPT_MASK;
4127 }
4128 if (rdev->irq.crtc_vblank_int[4] ||
4129 atomic_read(&rdev->irq.pflip[4])) {
4130 DRM_DEBUG("cik_irq_set: vblank 4\n");
4131 crtc5 |= VBLANK_INTERRUPT_MASK;
4132 }
4133 if (rdev->irq.crtc_vblank_int[5] ||
4134 atomic_read(&rdev->irq.pflip[5])) {
4135 DRM_DEBUG("cik_irq_set: vblank 5\n");
4136 crtc6 |= VBLANK_INTERRUPT_MASK;
4137 }
4138 if (rdev->irq.hpd[0]) {
4139 DRM_DEBUG("cik_irq_set: hpd 1\n");
4140 hpd1 |= DC_HPDx_INT_EN;
4141 }
4142 if (rdev->irq.hpd[1]) {
4143 DRM_DEBUG("cik_irq_set: hpd 2\n");
4144 hpd2 |= DC_HPDx_INT_EN;
4145 }
4146 if (rdev->irq.hpd[2]) {
4147 DRM_DEBUG("cik_irq_set: hpd 3\n");
4148 hpd3 |= DC_HPDx_INT_EN;
4149 }
4150 if (rdev->irq.hpd[3]) {
4151 DRM_DEBUG("cik_irq_set: hpd 4\n");
4152 hpd4 |= DC_HPDx_INT_EN;
4153 }
4154 if (rdev->irq.hpd[4]) {
4155 DRM_DEBUG("cik_irq_set: hpd 5\n");
4156 hpd5 |= DC_HPDx_INT_EN;
4157 }
4158 if (rdev->irq.hpd[5]) {
4159 DRM_DEBUG("cik_irq_set: hpd 6\n");
4160 hpd6 |= DC_HPDx_INT_EN;
4161 }
4162
4163 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4164
21a93e13
AD
4165 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4166 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4167
a59781bb
AD
4168 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4169
4170 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4171 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4172 if (rdev->num_crtc >= 4) {
4173 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4174 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4175 }
4176 if (rdev->num_crtc >= 6) {
4177 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4178 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4179 }
4180
4181 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4182 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4183 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4184 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4185 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4186 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4187
4188 return 0;
4189}
4190
4191/**
4192 * cik_irq_ack - ack interrupt sources
4193 *
4194 * @rdev: radeon_device pointer
4195 *
4196 * Ack interrupt sources on the GPU (vblanks, hpd,
4197 * etc.) (CIK). Certain interrupts sources are sw
4198 * generated and do not require an explicit ack.
4199 */
4200static inline void cik_irq_ack(struct radeon_device *rdev)
4201{
4202 u32 tmp;
4203
4204 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4205 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4206 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4207 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4208 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4209 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4210 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4211
4212 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4213 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4214 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4215 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4216 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4217 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4218 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4219 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4220
4221 if (rdev->num_crtc >= 4) {
4222 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4223 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4224 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4225 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4226 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4227 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4228 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4229 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4230 }
4231
4232 if (rdev->num_crtc >= 6) {
4233 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4234 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4235 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4236 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4237 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4238 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4239 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4240 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4241 }
4242
4243 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4244 tmp = RREG32(DC_HPD1_INT_CONTROL);
4245 tmp |= DC_HPDx_INT_ACK;
4246 WREG32(DC_HPD1_INT_CONTROL, tmp);
4247 }
4248 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4249 tmp = RREG32(DC_HPD2_INT_CONTROL);
4250 tmp |= DC_HPDx_INT_ACK;
4251 WREG32(DC_HPD2_INT_CONTROL, tmp);
4252 }
4253 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4254 tmp = RREG32(DC_HPD3_INT_CONTROL);
4255 tmp |= DC_HPDx_INT_ACK;
4256 WREG32(DC_HPD3_INT_CONTROL, tmp);
4257 }
4258 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4259 tmp = RREG32(DC_HPD4_INT_CONTROL);
4260 tmp |= DC_HPDx_INT_ACK;
4261 WREG32(DC_HPD4_INT_CONTROL, tmp);
4262 }
4263 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4264 tmp = RREG32(DC_HPD5_INT_CONTROL);
4265 tmp |= DC_HPDx_INT_ACK;
4266 WREG32(DC_HPD5_INT_CONTROL, tmp);
4267 }
4268 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4269 tmp = RREG32(DC_HPD5_INT_CONTROL);
4270 tmp |= DC_HPDx_INT_ACK;
4271 WREG32(DC_HPD6_INT_CONTROL, tmp);
4272 }
4273}
4274
4275/**
4276 * cik_irq_disable - disable interrupts
4277 *
4278 * @rdev: radeon_device pointer
4279 *
4280 * Disable interrupts on the hw (CIK).
4281 */
4282static void cik_irq_disable(struct radeon_device *rdev)
4283{
4284 cik_disable_interrupts(rdev);
4285 /* Wait and acknowledge irq */
4286 mdelay(1);
4287 cik_irq_ack(rdev);
4288 cik_disable_interrupt_state(rdev);
4289}
4290
4291/**
4292 * cik_irq_disable - disable interrupts for suspend
4293 *
4294 * @rdev: radeon_device pointer
4295 *
4296 * Disable interrupts and stop the RLC (CIK).
4297 * Used for suspend.
4298 */
4299static void cik_irq_suspend(struct radeon_device *rdev)
4300{
4301 cik_irq_disable(rdev);
4302 cik_rlc_stop(rdev);
4303}
4304
4305/**
4306 * cik_irq_fini - tear down interrupt support
4307 *
4308 * @rdev: radeon_device pointer
4309 *
4310 * Disable interrupts on the hw and free the IH ring
4311 * buffer (CIK).
4312 * Used for driver unload.
4313 */
4314static void cik_irq_fini(struct radeon_device *rdev)
4315{
4316 cik_irq_suspend(rdev);
4317 r600_ih_ring_fini(rdev);
4318}
4319
4320/**
4321 * cik_get_ih_wptr - get the IH ring buffer wptr
4322 *
4323 * @rdev: radeon_device pointer
4324 *
4325 * Get the IH ring buffer wptr from either the register
4326 * or the writeback memory buffer (CIK). Also check for
4327 * ring buffer overflow and deal with it.
4328 * Used by cik_irq_process().
4329 * Returns the value of the wptr.
4330 */
4331static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4332{
4333 u32 wptr, tmp;
4334
4335 if (rdev->wb.enabled)
4336 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4337 else
4338 wptr = RREG32(IH_RB_WPTR);
4339
4340 if (wptr & RB_OVERFLOW) {
4341 /* When a ring buffer overflow happen start parsing interrupt
4342 * from the last not overwritten vector (wptr + 16). Hopefully
4343 * this should allow us to catchup.
4344 */
4345 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4346 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4347 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4348 tmp = RREG32(IH_RB_CNTL);
4349 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4350 WREG32(IH_RB_CNTL, tmp);
4351 }
4352 return (wptr & rdev->ih.ptr_mask);
4353}
4354
4355/* CIK IV Ring
4356 * Each IV ring entry is 128 bits:
4357 * [7:0] - interrupt source id
4358 * [31:8] - reserved
4359 * [59:32] - interrupt source data
4360 * [63:60] - reserved
21a93e13
AD
4361 * [71:64] - RINGID
4362 * CP:
4363 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
a59781bb
AD
4364 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4365 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4366 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4367 * PIPE_ID - ME0 0=3D
4368 * - ME1&2 compute dispatcher (4 pipes each)
21a93e13
AD
4369 * SDMA:
4370 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4371 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4372 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
a59781bb
AD
4373 * [79:72] - VMID
4374 * [95:80] - PASID
4375 * [127:96] - reserved
4376 */
4377/**
4378 * cik_irq_process - interrupt handler
4379 *
4380 * @rdev: radeon_device pointer
4381 *
4382 * Interrupt hander (CIK). Walk the IH ring,
4383 * ack interrupts and schedule work to handle
4384 * interrupt events.
4385 * Returns irq process return code.
4386 */
4387int cik_irq_process(struct radeon_device *rdev)
4388{
4389 u32 wptr;
4390 u32 rptr;
4391 u32 src_id, src_data, ring_id;
4392 u8 me_id, pipe_id, queue_id;
4393 u32 ring_index;
4394 bool queue_hotplug = false;
4395 bool queue_reset = false;
4396
4397 if (!rdev->ih.enabled || rdev->shutdown)
4398 return IRQ_NONE;
4399
4400 wptr = cik_get_ih_wptr(rdev);
4401
4402restart_ih:
4403 /* is somebody else already processing irqs? */
4404 if (atomic_xchg(&rdev->ih.lock, 1))
4405 return IRQ_NONE;
4406
4407 rptr = rdev->ih.rptr;
4408 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4409
4410 /* Order reading of wptr vs. reading of IH ring data */
4411 rmb();
4412
4413 /* display interrupts */
4414 cik_irq_ack(rdev);
4415
4416 while (rptr != wptr) {
4417 /* wptr/rptr are in bytes! */
4418 ring_index = rptr / 4;
4419 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4420 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4421 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
a59781bb
AD
4422
4423 switch (src_id) {
4424 case 1: /* D1 vblank/vline */
4425 switch (src_data) {
4426 case 0: /* D1 vblank */
4427 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4428 if (rdev->irq.crtc_vblank_int[0]) {
4429 drm_handle_vblank(rdev->ddev, 0);
4430 rdev->pm.vblank_sync = true;
4431 wake_up(&rdev->irq.vblank_queue);
4432 }
4433 if (atomic_read(&rdev->irq.pflip[0]))
4434 radeon_crtc_handle_flip(rdev, 0);
4435 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4436 DRM_DEBUG("IH: D1 vblank\n");
4437 }
4438 break;
4439 case 1: /* D1 vline */
4440 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4441 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4442 DRM_DEBUG("IH: D1 vline\n");
4443 }
4444 break;
4445 default:
4446 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4447 break;
4448 }
4449 break;
4450 case 2: /* D2 vblank/vline */
4451 switch (src_data) {
4452 case 0: /* D2 vblank */
4453 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4454 if (rdev->irq.crtc_vblank_int[1]) {
4455 drm_handle_vblank(rdev->ddev, 1);
4456 rdev->pm.vblank_sync = true;
4457 wake_up(&rdev->irq.vblank_queue);
4458 }
4459 if (atomic_read(&rdev->irq.pflip[1]))
4460 radeon_crtc_handle_flip(rdev, 1);
4461 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4462 DRM_DEBUG("IH: D2 vblank\n");
4463 }
4464 break;
4465 case 1: /* D2 vline */
4466 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4467 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4468 DRM_DEBUG("IH: D2 vline\n");
4469 }
4470 break;
4471 default:
4472 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4473 break;
4474 }
4475 break;
4476 case 3: /* D3 vblank/vline */
4477 switch (src_data) {
4478 case 0: /* D3 vblank */
4479 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4480 if (rdev->irq.crtc_vblank_int[2]) {
4481 drm_handle_vblank(rdev->ddev, 2);
4482 rdev->pm.vblank_sync = true;
4483 wake_up(&rdev->irq.vblank_queue);
4484 }
4485 if (atomic_read(&rdev->irq.pflip[2]))
4486 radeon_crtc_handle_flip(rdev, 2);
4487 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4488 DRM_DEBUG("IH: D3 vblank\n");
4489 }
4490 break;
4491 case 1: /* D3 vline */
4492 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4493 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4494 DRM_DEBUG("IH: D3 vline\n");
4495 }
4496 break;
4497 default:
4498 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4499 break;
4500 }
4501 break;
4502 case 4: /* D4 vblank/vline */
4503 switch (src_data) {
4504 case 0: /* D4 vblank */
4505 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4506 if (rdev->irq.crtc_vblank_int[3]) {
4507 drm_handle_vblank(rdev->ddev, 3);
4508 rdev->pm.vblank_sync = true;
4509 wake_up(&rdev->irq.vblank_queue);
4510 }
4511 if (atomic_read(&rdev->irq.pflip[3]))
4512 radeon_crtc_handle_flip(rdev, 3);
4513 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4514 DRM_DEBUG("IH: D4 vblank\n");
4515 }
4516 break;
4517 case 1: /* D4 vline */
4518 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4519 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4520 DRM_DEBUG("IH: D4 vline\n");
4521 }
4522 break;
4523 default:
4524 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4525 break;
4526 }
4527 break;
4528 case 5: /* D5 vblank/vline */
4529 switch (src_data) {
4530 case 0: /* D5 vblank */
4531 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4532 if (rdev->irq.crtc_vblank_int[4]) {
4533 drm_handle_vblank(rdev->ddev, 4);
4534 rdev->pm.vblank_sync = true;
4535 wake_up(&rdev->irq.vblank_queue);
4536 }
4537 if (atomic_read(&rdev->irq.pflip[4]))
4538 radeon_crtc_handle_flip(rdev, 4);
4539 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4540 DRM_DEBUG("IH: D5 vblank\n");
4541 }
4542 break;
4543 case 1: /* D5 vline */
4544 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4545 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4546 DRM_DEBUG("IH: D5 vline\n");
4547 }
4548 break;
4549 default:
4550 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4551 break;
4552 }
4553 break;
4554 case 6: /* D6 vblank/vline */
4555 switch (src_data) {
4556 case 0: /* D6 vblank */
4557 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4558 if (rdev->irq.crtc_vblank_int[5]) {
4559 drm_handle_vblank(rdev->ddev, 5);
4560 rdev->pm.vblank_sync = true;
4561 wake_up(&rdev->irq.vblank_queue);
4562 }
4563 if (atomic_read(&rdev->irq.pflip[5]))
4564 radeon_crtc_handle_flip(rdev, 5);
4565 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4566 DRM_DEBUG("IH: D6 vblank\n");
4567 }
4568 break;
4569 case 1: /* D6 vline */
4570 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4571 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4572 DRM_DEBUG("IH: D6 vline\n");
4573 }
4574 break;
4575 default:
4576 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4577 break;
4578 }
4579 break;
4580 case 42: /* HPD hotplug */
4581 switch (src_data) {
4582 case 0:
4583 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4584 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
4585 queue_hotplug = true;
4586 DRM_DEBUG("IH: HPD1\n");
4587 }
4588 break;
4589 case 1:
4590 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4591 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4592 queue_hotplug = true;
4593 DRM_DEBUG("IH: HPD2\n");
4594 }
4595 break;
4596 case 2:
4597 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4598 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4599 queue_hotplug = true;
4600 DRM_DEBUG("IH: HPD3\n");
4601 }
4602 break;
4603 case 3:
4604 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4605 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4606 queue_hotplug = true;
4607 DRM_DEBUG("IH: HPD4\n");
4608 }
4609 break;
4610 case 4:
4611 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4612 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4613 queue_hotplug = true;
4614 DRM_DEBUG("IH: HPD5\n");
4615 }
4616 break;
4617 case 5:
4618 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4619 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4620 queue_hotplug = true;
4621 DRM_DEBUG("IH: HPD6\n");
4622 }
4623 break;
4624 default:
4625 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4626 break;
4627 }
4628 break;
9d97c99b
AD
4629 case 146:
4630 case 147:
4631 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4632 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4633 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4634 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4635 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4636 /* reset addr and status */
4637 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4638 break;
a59781bb
AD
4639 case 176: /* GFX RB CP_INT */
4640 case 177: /* GFX IB CP_INT */
4641 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4642 break;
4643 case 181: /* CP EOP event */
4644 DRM_DEBUG("IH: CP EOP\n");
21a93e13
AD
4645 /* XXX check the bitfield order! */
4646 me_id = (ring_id & 0x60) >> 5;
4647 pipe_id = (ring_id & 0x18) >> 3;
4648 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
4649 switch (me_id) {
4650 case 0:
4651 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4652 break;
4653 case 1:
4654 /* XXX compute */
4655 break;
4656 case 2:
4657 /* XXX compute */
4658 break;
4659 }
4660 break;
4661 case 184: /* CP Privileged reg access */
4662 DRM_ERROR("Illegal register access in command stream\n");
4663 /* XXX check the bitfield order! */
4664 me_id = (ring_id & 0x60) >> 5;
4665 pipe_id = (ring_id & 0x18) >> 3;
4666 queue_id = (ring_id & 0x7) >> 0;
4667 switch (me_id) {
4668 case 0:
4669 /* This results in a full GPU reset, but all we need to do is soft
4670 * reset the CP for gfx
4671 */
4672 queue_reset = true;
4673 break;
4674 case 1:
4675 /* XXX compute */
4676 break;
4677 case 2:
4678 /* XXX compute */
4679 break;
4680 }
4681 break;
4682 case 185: /* CP Privileged inst */
4683 DRM_ERROR("Illegal instruction in command stream\n");
21a93e13
AD
4684 /* XXX check the bitfield order! */
4685 me_id = (ring_id & 0x60) >> 5;
4686 pipe_id = (ring_id & 0x18) >> 3;
4687 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
4688 switch (me_id) {
4689 case 0:
4690 /* This results in a full GPU reset, but all we need to do is soft
4691 * reset the CP for gfx
4692 */
4693 queue_reset = true;
4694 break;
4695 case 1:
4696 /* XXX compute */
4697 break;
4698 case 2:
4699 /* XXX compute */
4700 break;
4701 }
4702 break;
21a93e13
AD
4703 case 224: /* SDMA trap event */
4704 /* XXX check the bitfield order! */
4705 me_id = (ring_id & 0x3) >> 0;
4706 queue_id = (ring_id & 0xc) >> 2;
4707 DRM_DEBUG("IH: SDMA trap\n");
4708 switch (me_id) {
4709 case 0:
4710 switch (queue_id) {
4711 case 0:
4712 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4713 break;
4714 case 1:
4715 /* XXX compute */
4716 break;
4717 case 2:
4718 /* XXX compute */
4719 break;
4720 }
4721 break;
4722 case 1:
4723 switch (queue_id) {
4724 case 0:
4725 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4726 break;
4727 case 1:
4728 /* XXX compute */
4729 break;
4730 case 2:
4731 /* XXX compute */
4732 break;
4733 }
4734 break;
4735 }
4736 break;
4737 case 241: /* SDMA Privileged inst */
4738 case 247: /* SDMA Privileged inst */
4739 DRM_ERROR("Illegal instruction in SDMA command stream\n");
4740 /* XXX check the bitfield order! */
4741 me_id = (ring_id & 0x3) >> 0;
4742 queue_id = (ring_id & 0xc) >> 2;
4743 switch (me_id) {
4744 case 0:
4745 switch (queue_id) {
4746 case 0:
4747 queue_reset = true;
4748 break;
4749 case 1:
4750 /* XXX compute */
4751 queue_reset = true;
4752 break;
4753 case 2:
4754 /* XXX compute */
4755 queue_reset = true;
4756 break;
4757 }
4758 break;
4759 case 1:
4760 switch (queue_id) {
4761 case 0:
4762 queue_reset = true;
4763 break;
4764 case 1:
4765 /* XXX compute */
4766 queue_reset = true;
4767 break;
4768 case 2:
4769 /* XXX compute */
4770 queue_reset = true;
4771 break;
4772 }
4773 break;
4774 }
4775 break;
a59781bb
AD
4776 case 233: /* GUI IDLE */
4777 DRM_DEBUG("IH: GUI idle\n");
4778 break;
4779 default:
4780 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4781 break;
4782 }
4783
4784 /* wptr/rptr are in bytes! */
4785 rptr += 16;
4786 rptr &= rdev->ih.ptr_mask;
4787 }
4788 if (queue_hotplug)
4789 schedule_work(&rdev->hotplug_work);
4790 if (queue_reset)
4791 schedule_work(&rdev->reset_work);
4792 rdev->ih.rptr = rptr;
4793 WREG32(IH_RB_RPTR, rdev->ih.rptr);
4794 atomic_set(&rdev->ih.lock, 0);
4795
4796 /* make sure wptr hasn't changed while processing */
4797 wptr = cik_get_ih_wptr(rdev);
4798 if (wptr != rptr)
4799 goto restart_ih;
4800
4801 return IRQ_HANDLED;
4802}
7bf94a2c
AD
4803
4804/*
4805 * startup/shutdown callbacks
4806 */
4807/**
4808 * cik_startup - program the asic to a functional state
4809 *
4810 * @rdev: radeon_device pointer
4811 *
4812 * Programs the asic to a functional state (CIK).
4813 * Called by cik_init() and cik_resume().
4814 * Returns 0 for success, error for failure.
4815 */
4816static int cik_startup(struct radeon_device *rdev)
4817{
4818 struct radeon_ring *ring;
4819 int r;
4820
4821 if (rdev->flags & RADEON_IS_IGP) {
4822 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4823 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
4824 r = cik_init_microcode(rdev);
4825 if (r) {
4826 DRM_ERROR("Failed to load firmware!\n");
4827 return r;
4828 }
4829 }
4830 } else {
4831 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4832 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
4833 !rdev->mc_fw) {
4834 r = cik_init_microcode(rdev);
4835 if (r) {
4836 DRM_ERROR("Failed to load firmware!\n");
4837 return r;
4838 }
4839 }
4840
4841 r = ci_mc_load_microcode(rdev);
4842 if (r) {
4843 DRM_ERROR("Failed to load MC firmware!\n");
4844 return r;
4845 }
4846 }
4847
4848 r = r600_vram_scratch_init(rdev);
4849 if (r)
4850 return r;
4851
4852 cik_mc_program(rdev);
4853 r = cik_pcie_gart_enable(rdev);
4854 if (r)
4855 return r;
4856 cik_gpu_init(rdev);
4857
4858 /* allocate rlc buffers */
4859 r = si_rlc_init(rdev);
4860 if (r) {
4861 DRM_ERROR("Failed to init rlc BOs!\n");
4862 return r;
4863 }
4864
4865 /* allocate wb buffer */
4866 r = radeon_wb_init(rdev);
4867 if (r)
4868 return r;
4869
4870 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4871 if (r) {
4872 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4873 return r;
4874 }
4875
4876 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4877 if (r) {
4878 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4879 return r;
4880 }
4881
4882 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4883 if (r) {
4884 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4885 return r;
4886 }
4887
4888 /* Enable IRQ */
4889 if (!rdev->irq.installed) {
4890 r = radeon_irq_kms_init(rdev);
4891 if (r)
4892 return r;
4893 }
4894
4895 r = cik_irq_init(rdev);
4896 if (r) {
4897 DRM_ERROR("radeon: IH init failed (%d).\n", r);
4898 radeon_irq_kms_fini(rdev);
4899 return r;
4900 }
4901 cik_irq_set(rdev);
4902
4903 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4904 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4905 CP_RB0_RPTR, CP_RB0_WPTR,
4906 0, 0xfffff, RADEON_CP_PACKET2);
4907 if (r)
4908 return r;
4909
4910 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4911 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4912 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
4913 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
4914 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4915 if (r)
4916 return r;
4917
4918 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4919 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4920 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
4921 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
4922 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4923 if (r)
4924 return r;
4925
4926 r = cik_cp_resume(rdev);
4927 if (r)
4928 return r;
4929
4930 r = cik_sdma_resume(rdev);
4931 if (r)
4932 return r;
4933
4934 r = radeon_ib_pool_init(rdev);
4935 if (r) {
4936 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4937 return r;
4938 }
4939
4940 r = radeon_vm_manager_init(rdev);
4941 if (r) {
4942 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4943 return r;
4944 }
4945
4946 return 0;
4947}
4948
4949/**
4950 * cik_resume - resume the asic to a functional state
4951 *
4952 * @rdev: radeon_device pointer
4953 *
4954 * Programs the asic to a functional state (CIK).
4955 * Called at resume.
4956 * Returns 0 for success, error for failure.
4957 */
4958int cik_resume(struct radeon_device *rdev)
4959{
4960 int r;
4961
4962 /* post card */
4963 atom_asic_init(rdev->mode_info.atom_context);
4964
4965 rdev->accel_working = true;
4966 r = cik_startup(rdev);
4967 if (r) {
4968 DRM_ERROR("cik startup failed on resume\n");
4969 rdev->accel_working = false;
4970 return r;
4971 }
4972
4973 return r;
4974
4975}
4976
4977/**
4978 * cik_suspend - suspend the asic
4979 *
4980 * @rdev: radeon_device pointer
4981 *
4982 * Bring the chip into a state suitable for suspend (CIK).
4983 * Called at suspend.
4984 * Returns 0 for success.
4985 */
4986int cik_suspend(struct radeon_device *rdev)
4987{
4988 radeon_vm_manager_fini(rdev);
4989 cik_cp_enable(rdev, false);
4990 cik_sdma_enable(rdev, false);
4991 cik_irq_suspend(rdev);
4992 radeon_wb_disable(rdev);
4993 cik_pcie_gart_disable(rdev);
4994 return 0;
4995}
4996
4997/* Plan is to move initialization in that function and use
4998 * helper function so that radeon_device_init pretty much
4999 * do nothing more than calling asic specific function. This
5000 * should also allow to remove a bunch of callback function
5001 * like vram_info.
5002 */
5003/**
5004 * cik_init - asic specific driver and hw init
5005 *
5006 * @rdev: radeon_device pointer
5007 *
5008 * Setup asic specific driver variables and program the hw
5009 * to a functional state (CIK).
5010 * Called at driver startup.
5011 * Returns 0 for success, errors for failure.
5012 */
5013int cik_init(struct radeon_device *rdev)
5014{
5015 struct radeon_ring *ring;
5016 int r;
5017
5018 /* Read BIOS */
5019 if (!radeon_get_bios(rdev)) {
5020 if (ASIC_IS_AVIVO(rdev))
5021 return -EINVAL;
5022 }
5023 /* Must be an ATOMBIOS */
5024 if (!rdev->is_atom_bios) {
5025 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5026 return -EINVAL;
5027 }
5028 r = radeon_atombios_init(rdev);
5029 if (r)
5030 return r;
5031
5032 /* Post card if necessary */
5033 if (!radeon_card_posted(rdev)) {
5034 if (!rdev->bios) {
5035 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5036 return -EINVAL;
5037 }
5038 DRM_INFO("GPU not posted. posting now...\n");
5039 atom_asic_init(rdev->mode_info.atom_context);
5040 }
5041 /* Initialize scratch registers */
5042 cik_scratch_init(rdev);
5043 /* Initialize surface registers */
5044 radeon_surface_init(rdev);
5045 /* Initialize clocks */
5046 radeon_get_clock_info(rdev->ddev);
5047
5048 /* Fence driver */
5049 r = radeon_fence_driver_init(rdev);
5050 if (r)
5051 return r;
5052
5053 /* initialize memory controller */
5054 r = cik_mc_init(rdev);
5055 if (r)
5056 return r;
5057 /* Memory manager */
5058 r = radeon_bo_init(rdev);
5059 if (r)
5060 return r;
5061
5062 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5063 ring->ring_obj = NULL;
5064 r600_ring_init(rdev, ring, 1024 * 1024);
5065
5066 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5067 ring->ring_obj = NULL;
5068 r600_ring_init(rdev, ring, 256 * 1024);
5069
5070 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5071 ring->ring_obj = NULL;
5072 r600_ring_init(rdev, ring, 256 * 1024);
5073
5074 rdev->ih.ring_obj = NULL;
5075 r600_ih_ring_init(rdev, 64 * 1024);
5076
5077 r = r600_pcie_gart_init(rdev);
5078 if (r)
5079 return r;
5080
5081 rdev->accel_working = true;
5082 r = cik_startup(rdev);
5083 if (r) {
5084 dev_err(rdev->dev, "disabling GPU acceleration\n");
5085 cik_cp_fini(rdev);
5086 cik_sdma_fini(rdev);
5087 cik_irq_fini(rdev);
5088 si_rlc_fini(rdev);
5089 radeon_wb_fini(rdev);
5090 radeon_ib_pool_fini(rdev);
5091 radeon_vm_manager_fini(rdev);
5092 radeon_irq_kms_fini(rdev);
5093 cik_pcie_gart_fini(rdev);
5094 rdev->accel_working = false;
5095 }
5096
5097 /* Don't start up if the MC ucode is missing.
5098 * The default clocks and voltages before the MC ucode
5099 * is loaded are not suffient for advanced operations.
5100 */
5101 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5102 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5103 return -EINVAL;
5104 }
5105
5106 return 0;
5107}
5108
5109/**
5110 * cik_fini - asic specific driver and hw fini
5111 *
5112 * @rdev: radeon_device pointer
5113 *
5114 * Tear down the asic specific driver variables and program the hw
5115 * to an idle state (CIK).
5116 * Called at driver unload.
5117 */
5118void cik_fini(struct radeon_device *rdev)
5119{
5120 cik_cp_fini(rdev);
5121 cik_sdma_fini(rdev);
5122 cik_irq_fini(rdev);
5123 si_rlc_fini(rdev);
5124 radeon_wb_fini(rdev);
5125 radeon_vm_manager_fini(rdev);
5126 radeon_ib_pool_fini(rdev);
5127 radeon_irq_kms_fini(rdev);
5128 cik_pcie_gart_fini(rdev);
5129 r600_vram_scratch_fini(rdev);
5130 radeon_gem_fini(rdev);
5131 radeon_fence_driver_fini(rdev);
5132 radeon_bo_fini(rdev);
5133 radeon_atombios_fini(rdev);
5134 kfree(rdev->bios);
5135 rdev->bios = NULL;
5136}
cd84a27d
AD
5137
5138/* display watermark setup */
5139/**
5140 * dce8_line_buffer_adjust - Set up the line buffer
5141 *
5142 * @rdev: radeon_device pointer
5143 * @radeon_crtc: the selected display controller
5144 * @mode: the current display mode on the selected display
5145 * controller
5146 *
5147 * Setup up the line buffer allocation for
5148 * the selected display controller (CIK).
5149 * Returns the line buffer size in pixels.
5150 */
5151static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5152 struct radeon_crtc *radeon_crtc,
5153 struct drm_display_mode *mode)
5154{
5155 u32 tmp;
5156
5157 /*
5158 * Line Buffer Setup
5159 * There are 6 line buffers, one for each display controllers.
5160 * There are 3 partitions per LB. Select the number of partitions
5161 * to enable based on the display width. For display widths larger
5162 * than 4096, you need use to use 2 display controllers and combine
5163 * them using the stereo blender.
5164 */
5165 if (radeon_crtc->base.enabled && mode) {
5166 if (mode->crtc_hdisplay < 1920)
5167 tmp = 1;
5168 else if (mode->crtc_hdisplay < 2560)
5169 tmp = 2;
5170 else if (mode->crtc_hdisplay < 4096)
5171 tmp = 0;
5172 else {
5173 DRM_DEBUG_KMS("Mode too big for LB!\n");
5174 tmp = 0;
5175 }
5176 } else
5177 tmp = 1;
5178
5179 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5180 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5181
5182 if (radeon_crtc->base.enabled && mode) {
5183 switch (tmp) {
5184 case 0:
5185 default:
5186 return 4096 * 2;
5187 case 1:
5188 return 1920 * 2;
5189 case 2:
5190 return 2560 * 2;
5191 }
5192 }
5193
5194 /* controller not enabled, so no lb used */
5195 return 0;
5196}
5197
5198/**
5199 * cik_get_number_of_dram_channels - get the number of dram channels
5200 *
5201 * @rdev: radeon_device pointer
5202 *
5203 * Look up the number of video ram channels (CIK).
5204 * Used for display watermark bandwidth calculations
5205 * Returns the number of dram channels
5206 */
5207static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5208{
5209 u32 tmp = RREG32(MC_SHARED_CHMAP);
5210
5211 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5212 case 0:
5213 default:
5214 return 1;
5215 case 1:
5216 return 2;
5217 case 2:
5218 return 4;
5219 case 3:
5220 return 8;
5221 case 4:
5222 return 3;
5223 case 5:
5224 return 6;
5225 case 6:
5226 return 10;
5227 case 7:
5228 return 12;
5229 case 8:
5230 return 16;
5231 }
5232}
5233
5234struct dce8_wm_params {
5235 u32 dram_channels; /* number of dram channels */
5236 u32 yclk; /* bandwidth per dram data pin in kHz */
5237 u32 sclk; /* engine clock in kHz */
5238 u32 disp_clk; /* display clock in kHz */
5239 u32 src_width; /* viewport width */
5240 u32 active_time; /* active display time in ns */
5241 u32 blank_time; /* blank time in ns */
5242 bool interlaced; /* mode is interlaced */
5243 fixed20_12 vsc; /* vertical scale ratio */
5244 u32 num_heads; /* number of active crtcs */
5245 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5246 u32 lb_size; /* line buffer allocated to pipe */
5247 u32 vtaps; /* vertical scaler taps */
5248};
5249
5250/**
5251 * dce8_dram_bandwidth - get the dram bandwidth
5252 *
5253 * @wm: watermark calculation data
5254 *
5255 * Calculate the raw dram bandwidth (CIK).
5256 * Used for display watermark bandwidth calculations
5257 * Returns the dram bandwidth in MBytes/s
5258 */
5259static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5260{
5261 /* Calculate raw DRAM Bandwidth */
5262 fixed20_12 dram_efficiency; /* 0.7 */
5263 fixed20_12 yclk, dram_channels, bandwidth;
5264 fixed20_12 a;
5265
5266 a.full = dfixed_const(1000);
5267 yclk.full = dfixed_const(wm->yclk);
5268 yclk.full = dfixed_div(yclk, a);
5269 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5270 a.full = dfixed_const(10);
5271 dram_efficiency.full = dfixed_const(7);
5272 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5273 bandwidth.full = dfixed_mul(dram_channels, yclk);
5274 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5275
5276 return dfixed_trunc(bandwidth);
5277}
5278
5279/**
5280 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5281 *
5282 * @wm: watermark calculation data
5283 *
5284 * Calculate the dram bandwidth used for display (CIK).
5285 * Used for display watermark bandwidth calculations
5286 * Returns the dram bandwidth for display in MBytes/s
5287 */
5288static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5289{
5290 /* Calculate DRAM Bandwidth and the part allocated to display. */
5291 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5292 fixed20_12 yclk, dram_channels, bandwidth;
5293 fixed20_12 a;
5294
5295 a.full = dfixed_const(1000);
5296 yclk.full = dfixed_const(wm->yclk);
5297 yclk.full = dfixed_div(yclk, a);
5298 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5299 a.full = dfixed_const(10);
5300 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5301 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5302 bandwidth.full = dfixed_mul(dram_channels, yclk);
5303 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5304
5305 return dfixed_trunc(bandwidth);
5306}
5307
5308/**
5309 * dce8_data_return_bandwidth - get the data return bandwidth
5310 *
5311 * @wm: watermark calculation data
5312 *
5313 * Calculate the data return bandwidth used for display (CIK).
5314 * Used for display watermark bandwidth calculations
5315 * Returns the data return bandwidth in MBytes/s
5316 */
5317static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5318{
5319 /* Calculate the display Data return Bandwidth */
5320 fixed20_12 return_efficiency; /* 0.8 */
5321 fixed20_12 sclk, bandwidth;
5322 fixed20_12 a;
5323
5324 a.full = dfixed_const(1000);
5325 sclk.full = dfixed_const(wm->sclk);
5326 sclk.full = dfixed_div(sclk, a);
5327 a.full = dfixed_const(10);
5328 return_efficiency.full = dfixed_const(8);
5329 return_efficiency.full = dfixed_div(return_efficiency, a);
5330 a.full = dfixed_const(32);
5331 bandwidth.full = dfixed_mul(a, sclk);
5332 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5333
5334 return dfixed_trunc(bandwidth);
5335}
5336
5337/**
5338 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5339 *
5340 * @wm: watermark calculation data
5341 *
5342 * Calculate the dmif bandwidth used for display (CIK).
5343 * Used for display watermark bandwidth calculations
5344 * Returns the dmif bandwidth in MBytes/s
5345 */
5346static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5347{
5348 /* Calculate the DMIF Request Bandwidth */
5349 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5350 fixed20_12 disp_clk, bandwidth;
5351 fixed20_12 a, b;
5352
5353 a.full = dfixed_const(1000);
5354 disp_clk.full = dfixed_const(wm->disp_clk);
5355 disp_clk.full = dfixed_div(disp_clk, a);
5356 a.full = dfixed_const(32);
5357 b.full = dfixed_mul(a, disp_clk);
5358
5359 a.full = dfixed_const(10);
5360 disp_clk_request_efficiency.full = dfixed_const(8);
5361 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5362
5363 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5364
5365 return dfixed_trunc(bandwidth);
5366}
5367
5368/**
5369 * dce8_available_bandwidth - get the min available bandwidth
5370 *
5371 * @wm: watermark calculation data
5372 *
5373 * Calculate the min available bandwidth used for display (CIK).
5374 * Used for display watermark bandwidth calculations
5375 * Returns the min available bandwidth in MBytes/s
5376 */
5377static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5378{
5379 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
5380 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
5381 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
5382 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
5383
5384 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
5385}
5386
5387/**
5388 * dce8_average_bandwidth - get the average available bandwidth
5389 *
5390 * @wm: watermark calculation data
5391 *
5392 * Calculate the average available bandwidth used for display (CIK).
5393 * Used for display watermark bandwidth calculations
5394 * Returns the average available bandwidth in MBytes/s
5395 */
5396static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
5397{
5398 /* Calculate the display mode Average Bandwidth
5399 * DisplayMode should contain the source and destination dimensions,
5400 * timing, etc.
5401 */
5402 fixed20_12 bpp;
5403 fixed20_12 line_time;
5404 fixed20_12 src_width;
5405 fixed20_12 bandwidth;
5406 fixed20_12 a;
5407
5408 a.full = dfixed_const(1000);
5409 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
5410 line_time.full = dfixed_div(line_time, a);
5411 bpp.full = dfixed_const(wm->bytes_per_pixel);
5412 src_width.full = dfixed_const(wm->src_width);
5413 bandwidth.full = dfixed_mul(src_width, bpp);
5414 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
5415 bandwidth.full = dfixed_div(bandwidth, line_time);
5416
5417 return dfixed_trunc(bandwidth);
5418}
5419
5420/**
5421 * dce8_latency_watermark - get the latency watermark
5422 *
5423 * @wm: watermark calculation data
5424 *
5425 * Calculate the latency watermark (CIK).
5426 * Used for display watermark bandwidth calculations
5427 * Returns the latency watermark in ns
5428 */
5429static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
5430{
5431 /* First calculate the latency in ns */
5432 u32 mc_latency = 2000; /* 2000 ns. */
5433 u32 available_bandwidth = dce8_available_bandwidth(wm);
5434 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
5435 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
5436 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
5437 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
5438 (wm->num_heads * cursor_line_pair_return_time);
5439 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
5440 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
5441 u32 tmp, dmif_size = 12288;
5442 fixed20_12 a, b, c;
5443
5444 if (wm->num_heads == 0)
5445 return 0;
5446
5447 a.full = dfixed_const(2);
5448 b.full = dfixed_const(1);
5449 if ((wm->vsc.full > a.full) ||
5450 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
5451 (wm->vtaps >= 5) ||
5452 ((wm->vsc.full >= a.full) && wm->interlaced))
5453 max_src_lines_per_dst_line = 4;
5454 else
5455 max_src_lines_per_dst_line = 2;
5456
5457 a.full = dfixed_const(available_bandwidth);
5458 b.full = dfixed_const(wm->num_heads);
5459 a.full = dfixed_div(a, b);
5460
5461 b.full = dfixed_const(mc_latency + 512);
5462 c.full = dfixed_const(wm->disp_clk);
5463 b.full = dfixed_div(b, c);
5464
5465 c.full = dfixed_const(dmif_size);
5466 b.full = dfixed_div(c, b);
5467
5468 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
5469
5470 b.full = dfixed_const(1000);
5471 c.full = dfixed_const(wm->disp_clk);
5472 b.full = dfixed_div(c, b);
5473 c.full = dfixed_const(wm->bytes_per_pixel);
5474 b.full = dfixed_mul(b, c);
5475
5476 lb_fill_bw = min(tmp, dfixed_trunc(b));
5477
5478 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
5479 b.full = dfixed_const(1000);
5480 c.full = dfixed_const(lb_fill_bw);
5481 b.full = dfixed_div(c, b);
5482 a.full = dfixed_div(a, b);
5483 line_fill_time = dfixed_trunc(a);
5484
5485 if (line_fill_time < wm->active_time)
5486 return latency;
5487 else
5488 return latency + (line_fill_time - wm->active_time);
5489
5490}
5491
5492/**
5493 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
5494 * average and available dram bandwidth
5495 *
5496 * @wm: watermark calculation data
5497 *
5498 * Check if the display average bandwidth fits in the display
5499 * dram bandwidth (CIK).
5500 * Used for display watermark bandwidth calculations
5501 * Returns true if the display fits, false if not.
5502 */
5503static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5504{
5505 if (dce8_average_bandwidth(wm) <=
5506 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
5507 return true;
5508 else
5509 return false;
5510}
5511
5512/**
5513 * dce8_average_bandwidth_vs_available_bandwidth - check
5514 * average and available bandwidth
5515 *
5516 * @wm: watermark calculation data
5517 *
5518 * Check if the display average bandwidth fits in the display
5519 * available bandwidth (CIK).
5520 * Used for display watermark bandwidth calculations
5521 * Returns true if the display fits, false if not.
5522 */
5523static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
5524{
5525 if (dce8_average_bandwidth(wm) <=
5526 (dce8_available_bandwidth(wm) / wm->num_heads))
5527 return true;
5528 else
5529 return false;
5530}
5531
5532/**
5533 * dce8_check_latency_hiding - check latency hiding
5534 *
5535 * @wm: watermark calculation data
5536 *
5537 * Check latency hiding (CIK).
5538 * Used for display watermark bandwidth calculations
5539 * Returns true if the display fits, false if not.
5540 */
5541static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
5542{
5543 u32 lb_partitions = wm->lb_size / wm->src_width;
5544 u32 line_time = wm->active_time + wm->blank_time;
5545 u32 latency_tolerant_lines;
5546 u32 latency_hiding;
5547 fixed20_12 a;
5548
5549 a.full = dfixed_const(1);
5550 if (wm->vsc.full > a.full)
5551 latency_tolerant_lines = 1;
5552 else {
5553 if (lb_partitions <= (wm->vtaps + 1))
5554 latency_tolerant_lines = 1;
5555 else
5556 latency_tolerant_lines = 2;
5557 }
5558
5559 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
5560
5561 if (dce8_latency_watermark(wm) <= latency_hiding)
5562 return true;
5563 else
5564 return false;
5565}
5566
5567/**
5568 * dce8_program_watermarks - program display watermarks
5569 *
5570 * @rdev: radeon_device pointer
5571 * @radeon_crtc: the selected display controller
5572 * @lb_size: line buffer size
5573 * @num_heads: number of display controllers in use
5574 *
5575 * Calculate and program the display watermarks for the
5576 * selected display controller (CIK).
5577 */
5578static void dce8_program_watermarks(struct radeon_device *rdev,
5579 struct radeon_crtc *radeon_crtc,
5580 u32 lb_size, u32 num_heads)
5581{
5582 struct drm_display_mode *mode = &radeon_crtc->base.mode;
5583 struct dce8_wm_params wm;
5584 u32 pixel_period;
5585 u32 line_time = 0;
5586 u32 latency_watermark_a = 0, latency_watermark_b = 0;
5587 u32 tmp, wm_mask;
5588
5589 if (radeon_crtc->base.enabled && num_heads && mode) {
5590 pixel_period = 1000000 / (u32)mode->clock;
5591 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
5592
5593 wm.yclk = rdev->pm.current_mclk * 10;
5594 wm.sclk = rdev->pm.current_sclk * 10;
5595 wm.disp_clk = mode->clock;
5596 wm.src_width = mode->crtc_hdisplay;
5597 wm.active_time = mode->crtc_hdisplay * pixel_period;
5598 wm.blank_time = line_time - wm.active_time;
5599 wm.interlaced = false;
5600 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
5601 wm.interlaced = true;
5602 wm.vsc = radeon_crtc->vsc;
5603 wm.vtaps = 1;
5604 if (radeon_crtc->rmx_type != RMX_OFF)
5605 wm.vtaps = 2;
5606 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
5607 wm.lb_size = lb_size;
5608 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
5609 wm.num_heads = num_heads;
5610
5611 /* set for high clocks */
5612 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
5613 /* set for low clocks */
5614 /* wm.yclk = low clk; wm.sclk = low clk */
5615 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
5616
5617 /* possibly force display priority to high */
5618 /* should really do this at mode validation time... */
5619 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
5620 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
5621 !dce8_check_latency_hiding(&wm) ||
5622 (rdev->disp_priority == 2)) {
5623 DRM_DEBUG_KMS("force priority to high\n");
5624 }
5625 }
5626
5627 /* select wm A */
5628 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5629 tmp = wm_mask;
5630 tmp &= ~LATENCY_WATERMARK_MASK(3);
5631 tmp |= LATENCY_WATERMARK_MASK(1);
5632 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5633 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5634 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
5635 LATENCY_HIGH_WATERMARK(line_time)));
5636 /* select wm B */
5637 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5638 tmp &= ~LATENCY_WATERMARK_MASK(3);
5639 tmp |= LATENCY_WATERMARK_MASK(2);
5640 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5641 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5642 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
5643 LATENCY_HIGH_WATERMARK(line_time)));
5644 /* restore original selection */
5645 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
5646}
5647
5648/**
5649 * dce8_bandwidth_update - program display watermarks
5650 *
5651 * @rdev: radeon_device pointer
5652 *
5653 * Calculate and program the display watermarks and line
5654 * buffer allocation (CIK).
5655 */
5656void dce8_bandwidth_update(struct radeon_device *rdev)
5657{
5658 struct drm_display_mode *mode = NULL;
5659 u32 num_heads = 0, lb_size;
5660 int i;
5661
5662 radeon_update_display_priority(rdev);
5663
5664 for (i = 0; i < rdev->num_crtc; i++) {
5665 if (rdev->mode_info.crtcs[i]->base.enabled)
5666 num_heads++;
5667 }
5668 for (i = 0; i < rdev->num_crtc; i++) {
5669 mode = &rdev->mode_info.crtcs[i]->base.mode;
5670 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
5671 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
5672 }
5673}
44fa346f
AD
5674
5675/**
5676 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
5677 *
5678 * @rdev: radeon_device pointer
5679 *
5680 * Fetches a GPU clock counter snapshot (SI).
5681 * Returns the 64 bit clock counter snapshot.
5682 */
5683uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
5684{
5685 uint64_t clock;
5686
5687 mutex_lock(&rdev->gpu_clock_mutex);
5688 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5689 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5690 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5691 mutex_unlock(&rdev->gpu_clock_mutex);
5692 return clock;
5693}
5694