drm/radeon: add UVD support for CIK (v3)
[linux-2.6-block.git] / drivers / gpu / drm / radeon / cik.c
CommitLineData
8cc1a532
AD
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
6f2043ce 30#include "radeon_asic.h"
8cc1a532
AD
31#include "cikd.h"
32#include "atom.h"
841cf442 33#include "cik_blit_shaders.h"
8cc1a532 34
02c81327
AD
35/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
21a93e13
AD
47/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
02c81327
AD
50
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
21a93e13 57MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
02c81327
AD
58MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
21a93e13 63MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
02c81327
AD
64MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
21a93e13 69MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
02c81327 70
a59781bb
AD
71extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
6f2043ce
AD
73extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
cc066715 75extern bool evergreen_is_display_hung(struct radeon_device *rdev);
1c49165d 76extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
7bf94a2c
AD
77extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
cc066715 79static void cik_rlc_stop(struct radeon_device *rdev);
6f2043ce 80
6e2c3c0a
AD
81/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
2c67912c
AD
102/**
103 * cik_get_xclk - get the xclk
104 *
105 * @rdev: radeon_device pointer
106 *
107 * Returns the reference clock used by the gfx engine
108 * (CIK).
109 */
110u32 cik_get_xclk(struct radeon_device *rdev)
111{
112 u32 reference_clock = rdev->clock.spll.reference_freq;
113
114 if (rdev->flags & RADEON_IS_IGP) {
115 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
116 return reference_clock / 2;
117 } else {
118 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
119 return reference_clock / 4;
120 }
121 return reference_clock;
122}
123
bc8273fe
AD
124#define BONAIRE_IO_MC_REGS_SIZE 36
125
126static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
127{
128 {0x00000070, 0x04400000},
129 {0x00000071, 0x80c01803},
130 {0x00000072, 0x00004004},
131 {0x00000073, 0x00000100},
132 {0x00000074, 0x00ff0000},
133 {0x00000075, 0x34000000},
134 {0x00000076, 0x08000014},
135 {0x00000077, 0x00cc08ec},
136 {0x00000078, 0x00000400},
137 {0x00000079, 0x00000000},
138 {0x0000007a, 0x04090000},
139 {0x0000007c, 0x00000000},
140 {0x0000007e, 0x4408a8e8},
141 {0x0000007f, 0x00000304},
142 {0x00000080, 0x00000000},
143 {0x00000082, 0x00000001},
144 {0x00000083, 0x00000002},
145 {0x00000084, 0xf3e4f400},
146 {0x00000085, 0x052024e3},
147 {0x00000087, 0x00000000},
148 {0x00000088, 0x01000000},
149 {0x0000008a, 0x1c0a0000},
150 {0x0000008b, 0xff010000},
151 {0x0000008d, 0xffffefff},
152 {0x0000008e, 0xfff3efff},
153 {0x0000008f, 0xfff3efbf},
154 {0x00000092, 0xf7ffffff},
155 {0x00000093, 0xffffff7f},
156 {0x00000095, 0x00101101},
157 {0x00000096, 0x00000fff},
158 {0x00000097, 0x00116fff},
159 {0x00000098, 0x60010000},
160 {0x00000099, 0x10010000},
161 {0x0000009a, 0x00006000},
162 {0x0000009b, 0x00001000},
163 {0x0000009f, 0x00b48000}
164};
165
166/* ucode loading */
167/**
168 * ci_mc_load_microcode - load MC ucode into the hw
169 *
170 * @rdev: radeon_device pointer
171 *
172 * Load the GDDR MC ucode into the hw (CIK).
173 * Returns 0 on success, error on failure.
174 */
175static int ci_mc_load_microcode(struct radeon_device *rdev)
176{
177 const __be32 *fw_data;
178 u32 running, blackout = 0;
179 u32 *io_mc_regs;
180 int i, ucode_size, regs_size;
181
182 if (!rdev->mc_fw)
183 return -EINVAL;
184
185 switch (rdev->family) {
186 case CHIP_BONAIRE:
187 default:
188 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
189 ucode_size = CIK_MC_UCODE_SIZE;
190 regs_size = BONAIRE_IO_MC_REGS_SIZE;
191 break;
192 }
193
194 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
195
196 if (running == 0) {
197 if (running) {
198 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
199 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
200 }
201
202 /* reset the engine and set to writable */
203 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
204 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
205
206 /* load mc io regs */
207 for (i = 0; i < regs_size; i++) {
208 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
209 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
210 }
211 /* load the MC ucode */
212 fw_data = (const __be32 *)rdev->mc_fw->data;
213 for (i = 0; i < ucode_size; i++)
214 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
215
216 /* put the engine back into the active state */
217 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
218 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
219 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
220
221 /* wait for training to complete */
222 for (i = 0; i < rdev->usec_timeout; i++) {
223 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
224 break;
225 udelay(1);
226 }
227 for (i = 0; i < rdev->usec_timeout; i++) {
228 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
229 break;
230 udelay(1);
231 }
232
233 if (running)
234 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
235 }
236
237 return 0;
238}
239
02c81327
AD
240/**
241 * cik_init_microcode - load ucode images from disk
242 *
243 * @rdev: radeon_device pointer
244 *
245 * Use the firmware interface to load the ucode images into
246 * the driver (not loaded into hw).
247 * Returns 0 on success, error on failure.
248 */
249static int cik_init_microcode(struct radeon_device *rdev)
250{
251 struct platform_device *pdev;
252 const char *chip_name;
253 size_t pfp_req_size, me_req_size, ce_req_size,
21a93e13
AD
254 mec_req_size, rlc_req_size, mc_req_size,
255 sdma_req_size;
02c81327
AD
256 char fw_name[30];
257 int err;
258
259 DRM_DEBUG("\n");
260
261 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
262 err = IS_ERR(pdev);
263 if (err) {
264 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
265 return -EINVAL;
266 }
267
268 switch (rdev->family) {
269 case CHIP_BONAIRE:
270 chip_name = "BONAIRE";
271 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
272 me_req_size = CIK_ME_UCODE_SIZE * 4;
273 ce_req_size = CIK_CE_UCODE_SIZE * 4;
274 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
275 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
276 mc_req_size = CIK_MC_UCODE_SIZE * 4;
21a93e13 277 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
278 break;
279 case CHIP_KAVERI:
280 chip_name = "KAVERI";
281 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
282 me_req_size = CIK_ME_UCODE_SIZE * 4;
283 ce_req_size = CIK_CE_UCODE_SIZE * 4;
284 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
285 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
21a93e13 286 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
287 break;
288 case CHIP_KABINI:
289 chip_name = "KABINI";
290 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
291 me_req_size = CIK_ME_UCODE_SIZE * 4;
292 ce_req_size = CIK_CE_UCODE_SIZE * 4;
293 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
294 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
21a93e13 295 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
296 break;
297 default: BUG();
298 }
299
300 DRM_INFO("Loading %s Microcode\n", chip_name);
301
302 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
303 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
304 if (err)
305 goto out;
306 if (rdev->pfp_fw->size != pfp_req_size) {
307 printk(KERN_ERR
308 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
309 rdev->pfp_fw->size, fw_name);
310 err = -EINVAL;
311 goto out;
312 }
313
314 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
315 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
316 if (err)
317 goto out;
318 if (rdev->me_fw->size != me_req_size) {
319 printk(KERN_ERR
320 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
321 rdev->me_fw->size, fw_name);
322 err = -EINVAL;
323 }
324
325 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
326 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
327 if (err)
328 goto out;
329 if (rdev->ce_fw->size != ce_req_size) {
330 printk(KERN_ERR
331 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
332 rdev->ce_fw->size, fw_name);
333 err = -EINVAL;
334 }
335
336 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
337 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
338 if (err)
339 goto out;
340 if (rdev->mec_fw->size != mec_req_size) {
341 printk(KERN_ERR
342 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
343 rdev->mec_fw->size, fw_name);
344 err = -EINVAL;
345 }
346
347 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
348 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
349 if (err)
350 goto out;
351 if (rdev->rlc_fw->size != rlc_req_size) {
352 printk(KERN_ERR
353 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
354 rdev->rlc_fw->size, fw_name);
355 err = -EINVAL;
356 }
357
21a93e13
AD
358 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
359 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
360 if (err)
361 goto out;
362 if (rdev->sdma_fw->size != sdma_req_size) {
363 printk(KERN_ERR
364 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
365 rdev->sdma_fw->size, fw_name);
366 err = -EINVAL;
367 }
368
02c81327
AD
369 /* No MC ucode on APUs */
370 if (!(rdev->flags & RADEON_IS_IGP)) {
371 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
372 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
373 if (err)
374 goto out;
375 if (rdev->mc_fw->size != mc_req_size) {
376 printk(KERN_ERR
377 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
378 rdev->mc_fw->size, fw_name);
379 err = -EINVAL;
380 }
381 }
382
383out:
384 platform_device_unregister(pdev);
385
386 if (err) {
387 if (err != -EINVAL)
388 printk(KERN_ERR
389 "cik_cp: Failed to load firmware \"%s\"\n",
390 fw_name);
391 release_firmware(rdev->pfp_fw);
392 rdev->pfp_fw = NULL;
393 release_firmware(rdev->me_fw);
394 rdev->me_fw = NULL;
395 release_firmware(rdev->ce_fw);
396 rdev->ce_fw = NULL;
397 release_firmware(rdev->rlc_fw);
398 rdev->rlc_fw = NULL;
399 release_firmware(rdev->mc_fw);
400 rdev->mc_fw = NULL;
401 }
402 return err;
403}
404
8cc1a532
AD
405/*
406 * Core functions
407 */
408/**
409 * cik_tiling_mode_table_init - init the hw tiling table
410 *
411 * @rdev: radeon_device pointer
412 *
413 * Starting with SI, the tiling setup is done globally in a
414 * set of 32 tiling modes. Rather than selecting each set of
415 * parameters per surface as on older asics, we just select
416 * which index in the tiling table we want to use, and the
417 * surface uses those parameters (CIK).
418 */
419static void cik_tiling_mode_table_init(struct radeon_device *rdev)
420{
421 const u32 num_tile_mode_states = 32;
422 const u32 num_secondary_tile_mode_states = 16;
423 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
424 u32 num_pipe_configs;
425 u32 num_rbs = rdev->config.cik.max_backends_per_se *
426 rdev->config.cik.max_shader_engines;
427
428 switch (rdev->config.cik.mem_row_size_in_kb) {
429 case 1:
430 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
431 break;
432 case 2:
433 default:
434 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
435 break;
436 case 4:
437 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
438 break;
439 }
440
441 num_pipe_configs = rdev->config.cik.max_tile_pipes;
442 if (num_pipe_configs > 8)
443 num_pipe_configs = 8; /* ??? */
444
445 if (num_pipe_configs == 8) {
446 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
447 switch (reg_offset) {
448 case 0:
449 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
453 break;
454 case 1:
455 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
456 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
457 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
458 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
459 break;
460 case 2:
461 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
462 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
465 break;
466 case 3:
467 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
468 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
469 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
470 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
471 break;
472 case 4:
473 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
474 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
475 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
476 TILE_SPLIT(split_equal_to_row_size));
477 break;
478 case 5:
479 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
480 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
481 break;
482 case 6:
483 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
484 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
486 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
487 break;
488 case 7:
489 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
490 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
491 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
492 TILE_SPLIT(split_equal_to_row_size));
493 break;
494 case 8:
495 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
497 break;
498 case 9:
499 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
500 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
501 break;
502 case 10:
503 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
504 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
507 break;
508 case 11:
509 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
510 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
511 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
513 break;
514 case 12:
515 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
516 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
518 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
519 break;
520 case 13:
521 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
522 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
523 break;
524 case 14:
525 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
526 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
527 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
528 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
529 break;
530 case 16:
531 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
532 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
533 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
534 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
535 break;
536 case 17:
537 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
539 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
541 break;
542 case 27:
543 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
544 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
545 break;
546 case 28:
547 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
548 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
551 break;
552 case 29:
553 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
555 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
557 break;
558 case 30:
559 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
560 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
561 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
563 break;
564 default:
565 gb_tile_moden = 0;
566 break;
567 }
568 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
569 }
570 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
571 switch (reg_offset) {
572 case 0:
573 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
576 NUM_BANKS(ADDR_SURF_16_BANK));
577 break;
578 case 1:
579 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
580 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
581 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
582 NUM_BANKS(ADDR_SURF_16_BANK));
583 break;
584 case 2:
585 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
588 NUM_BANKS(ADDR_SURF_16_BANK));
589 break;
590 case 3:
591 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
592 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
593 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
594 NUM_BANKS(ADDR_SURF_16_BANK));
595 break;
596 case 4:
597 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
600 NUM_BANKS(ADDR_SURF_8_BANK));
601 break;
602 case 5:
603 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
604 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
605 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
606 NUM_BANKS(ADDR_SURF_4_BANK));
607 break;
608 case 6:
609 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
612 NUM_BANKS(ADDR_SURF_2_BANK));
613 break;
614 case 8:
615 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
616 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
617 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
618 NUM_BANKS(ADDR_SURF_16_BANK));
619 break;
620 case 9:
621 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
624 NUM_BANKS(ADDR_SURF_16_BANK));
625 break;
626 case 10:
627 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
630 NUM_BANKS(ADDR_SURF_16_BANK));
631 break;
632 case 11:
633 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
636 NUM_BANKS(ADDR_SURF_16_BANK));
637 break;
638 case 12:
639 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
642 NUM_BANKS(ADDR_SURF_8_BANK));
643 break;
644 case 13:
645 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
648 NUM_BANKS(ADDR_SURF_4_BANK));
649 break;
650 case 14:
651 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
654 NUM_BANKS(ADDR_SURF_2_BANK));
655 break;
656 default:
657 gb_tile_moden = 0;
658 break;
659 }
660 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
661 }
662 } else if (num_pipe_configs == 4) {
663 if (num_rbs == 4) {
664 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
665 switch (reg_offset) {
666 case 0:
667 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
668 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
669 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
670 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
671 break;
672 case 1:
673 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
674 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
675 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
676 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
677 break;
678 case 2:
679 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
680 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
681 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
682 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
683 break;
684 case 3:
685 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
686 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
687 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
688 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
689 break;
690 case 4:
691 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
692 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
693 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
694 TILE_SPLIT(split_equal_to_row_size));
695 break;
696 case 5:
697 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
698 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
699 break;
700 case 6:
701 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
702 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
704 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
705 break;
706 case 7:
707 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
708 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
709 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
710 TILE_SPLIT(split_equal_to_row_size));
711 break;
712 case 8:
713 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
714 PIPE_CONFIG(ADDR_SURF_P4_16x16));
715 break;
716 case 9:
717 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
718 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
719 break;
720 case 10:
721 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
722 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
725 break;
726 case 11:
727 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
728 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
729 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
731 break;
732 case 12:
733 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
734 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
737 break;
738 case 13:
739 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
740 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
741 break;
742 case 14:
743 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
744 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
745 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
747 break;
748 case 16:
749 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
750 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
751 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
753 break;
754 case 17:
755 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
756 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
757 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
759 break;
760 case 27:
761 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
762 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
763 break;
764 case 28:
765 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
766 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
769 break;
770 case 29:
771 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
772 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
773 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
774 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
775 break;
776 case 30:
777 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
778 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
781 break;
782 default:
783 gb_tile_moden = 0;
784 break;
785 }
786 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
787 }
788 } else if (num_rbs < 4) {
789 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
790 switch (reg_offset) {
791 case 0:
792 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
793 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
794 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
796 break;
797 case 1:
798 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
799 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
800 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
801 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
802 break;
803 case 2:
804 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
805 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
806 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
808 break;
809 case 3:
810 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
811 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
812 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
813 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
814 break;
815 case 4:
816 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
817 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
818 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
819 TILE_SPLIT(split_equal_to_row_size));
820 break;
821 case 5:
822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
824 break;
825 case 6:
826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
827 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
828 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
829 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
830 break;
831 case 7:
832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
833 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
834 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
835 TILE_SPLIT(split_equal_to_row_size));
836 break;
837 case 8:
838 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
839 PIPE_CONFIG(ADDR_SURF_P4_8x16));
840 break;
841 case 9:
842 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
843 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
844 break;
845 case 10:
846 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
847 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
848 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
850 break;
851 case 11:
852 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
853 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
854 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
855 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
856 break;
857 case 12:
858 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
859 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
860 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
862 break;
863 case 13:
864 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
866 break;
867 case 14:
868 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
869 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
870 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
872 break;
873 case 16:
874 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
875 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
876 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
878 break;
879 case 17:
880 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
882 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
884 break;
885 case 27:
886 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
887 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
888 break;
889 case 28:
890 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
891 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
892 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
894 break;
895 case 29:
896 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
897 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
898 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
900 break;
901 case 30:
902 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
903 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
904 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
905 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
906 break;
907 default:
908 gb_tile_moden = 0;
909 break;
910 }
911 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
912 }
913 }
914 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
915 switch (reg_offset) {
916 case 0:
917 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
918 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
919 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
920 NUM_BANKS(ADDR_SURF_16_BANK));
921 break;
922 case 1:
923 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
926 NUM_BANKS(ADDR_SURF_16_BANK));
927 break;
928 case 2:
929 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
930 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
931 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
932 NUM_BANKS(ADDR_SURF_16_BANK));
933 break;
934 case 3:
935 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
938 NUM_BANKS(ADDR_SURF_16_BANK));
939 break;
940 case 4:
941 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
944 NUM_BANKS(ADDR_SURF_16_BANK));
945 break;
946 case 5:
947 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
950 NUM_BANKS(ADDR_SURF_8_BANK));
951 break;
952 case 6:
953 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
956 NUM_BANKS(ADDR_SURF_4_BANK));
957 break;
958 case 8:
959 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
962 NUM_BANKS(ADDR_SURF_16_BANK));
963 break;
964 case 9:
965 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
968 NUM_BANKS(ADDR_SURF_16_BANK));
969 break;
970 case 10:
971 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
972 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
973 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
974 NUM_BANKS(ADDR_SURF_16_BANK));
975 break;
976 case 11:
977 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
980 NUM_BANKS(ADDR_SURF_16_BANK));
981 break;
982 case 12:
983 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
986 NUM_BANKS(ADDR_SURF_16_BANK));
987 break;
988 case 13:
989 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
992 NUM_BANKS(ADDR_SURF_8_BANK));
993 break;
994 case 14:
995 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
996 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
997 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
998 NUM_BANKS(ADDR_SURF_4_BANK));
999 break;
1000 default:
1001 gb_tile_moden = 0;
1002 break;
1003 }
1004 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1005 }
1006 } else if (num_pipe_configs == 2) {
1007 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1008 switch (reg_offset) {
1009 case 0:
1010 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1011 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1012 PIPE_CONFIG(ADDR_SURF_P2) |
1013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1014 break;
1015 case 1:
1016 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1017 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1018 PIPE_CONFIG(ADDR_SURF_P2) |
1019 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1020 break;
1021 case 2:
1022 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1023 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1024 PIPE_CONFIG(ADDR_SURF_P2) |
1025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1026 break;
1027 case 3:
1028 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1029 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1030 PIPE_CONFIG(ADDR_SURF_P2) |
1031 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1032 break;
1033 case 4:
1034 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1035 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1036 PIPE_CONFIG(ADDR_SURF_P2) |
1037 TILE_SPLIT(split_equal_to_row_size));
1038 break;
1039 case 5:
1040 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1041 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1042 break;
1043 case 6:
1044 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1045 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1046 PIPE_CONFIG(ADDR_SURF_P2) |
1047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1048 break;
1049 case 7:
1050 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1052 PIPE_CONFIG(ADDR_SURF_P2) |
1053 TILE_SPLIT(split_equal_to_row_size));
1054 break;
1055 case 8:
1056 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1057 break;
1058 case 9:
1059 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1060 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1061 break;
1062 case 10:
1063 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1065 PIPE_CONFIG(ADDR_SURF_P2) |
1066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1067 break;
1068 case 11:
1069 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1071 PIPE_CONFIG(ADDR_SURF_P2) |
1072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1073 break;
1074 case 12:
1075 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1077 PIPE_CONFIG(ADDR_SURF_P2) |
1078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1079 break;
1080 case 13:
1081 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1082 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1083 break;
1084 case 14:
1085 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1086 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1087 PIPE_CONFIG(ADDR_SURF_P2) |
1088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1089 break;
1090 case 16:
1091 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1093 PIPE_CONFIG(ADDR_SURF_P2) |
1094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1095 break;
1096 case 17:
1097 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1098 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1099 PIPE_CONFIG(ADDR_SURF_P2) |
1100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1101 break;
1102 case 27:
1103 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1104 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1105 break;
1106 case 28:
1107 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1108 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1109 PIPE_CONFIG(ADDR_SURF_P2) |
1110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1111 break;
1112 case 29:
1113 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1114 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1115 PIPE_CONFIG(ADDR_SURF_P2) |
1116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1117 break;
1118 case 30:
1119 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1120 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1121 PIPE_CONFIG(ADDR_SURF_P2) |
1122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1123 break;
1124 default:
1125 gb_tile_moden = 0;
1126 break;
1127 }
1128 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1129 }
1130 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1131 switch (reg_offset) {
1132 case 0:
1133 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1136 NUM_BANKS(ADDR_SURF_16_BANK));
1137 break;
1138 case 1:
1139 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1142 NUM_BANKS(ADDR_SURF_16_BANK));
1143 break;
1144 case 2:
1145 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1148 NUM_BANKS(ADDR_SURF_16_BANK));
1149 break;
1150 case 3:
1151 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1154 NUM_BANKS(ADDR_SURF_16_BANK));
1155 break;
1156 case 4:
1157 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1160 NUM_BANKS(ADDR_SURF_16_BANK));
1161 break;
1162 case 5:
1163 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1166 NUM_BANKS(ADDR_SURF_16_BANK));
1167 break;
1168 case 6:
1169 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1172 NUM_BANKS(ADDR_SURF_8_BANK));
1173 break;
1174 case 8:
1175 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1178 NUM_BANKS(ADDR_SURF_16_BANK));
1179 break;
1180 case 9:
1181 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1184 NUM_BANKS(ADDR_SURF_16_BANK));
1185 break;
1186 case 10:
1187 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1190 NUM_BANKS(ADDR_SURF_16_BANK));
1191 break;
1192 case 11:
1193 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1196 NUM_BANKS(ADDR_SURF_16_BANK));
1197 break;
1198 case 12:
1199 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1202 NUM_BANKS(ADDR_SURF_16_BANK));
1203 break;
1204 case 13:
1205 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1208 NUM_BANKS(ADDR_SURF_16_BANK));
1209 break;
1210 case 14:
1211 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1214 NUM_BANKS(ADDR_SURF_8_BANK));
1215 break;
1216 default:
1217 gb_tile_moden = 0;
1218 break;
1219 }
1220 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1221 }
1222 } else
1223 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1224}
1225
1226/**
1227 * cik_select_se_sh - select which SE, SH to address
1228 *
1229 * @rdev: radeon_device pointer
1230 * @se_num: shader engine to address
1231 * @sh_num: sh block to address
1232 *
1233 * Select which SE, SH combinations to address. Certain
1234 * registers are instanced per SE or SH. 0xffffffff means
1235 * broadcast to all SEs or SHs (CIK).
1236 */
1237static void cik_select_se_sh(struct radeon_device *rdev,
1238 u32 se_num, u32 sh_num)
1239{
1240 u32 data = INSTANCE_BROADCAST_WRITES;
1241
1242 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1243 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1244 else if (se_num == 0xffffffff)
1245 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1246 else if (sh_num == 0xffffffff)
1247 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1248 else
1249 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1250 WREG32(GRBM_GFX_INDEX, data);
1251}
1252
1253/**
1254 * cik_create_bitmask - create a bitmask
1255 *
1256 * @bit_width: length of the mask
1257 *
1258 * create a variable length bit mask (CIK).
1259 * Returns the bitmask.
1260 */
1261static u32 cik_create_bitmask(u32 bit_width)
1262{
1263 u32 i, mask = 0;
1264
1265 for (i = 0; i < bit_width; i++) {
1266 mask <<= 1;
1267 mask |= 1;
1268 }
1269 return mask;
1270}
1271
1272/**
1273 * cik_select_se_sh - select which SE, SH to address
1274 *
1275 * @rdev: radeon_device pointer
1276 * @max_rb_num: max RBs (render backends) for the asic
1277 * @se_num: number of SEs (shader engines) for the asic
1278 * @sh_per_se: number of SH blocks per SE for the asic
1279 *
1280 * Calculates the bitmask of disabled RBs (CIK).
1281 * Returns the disabled RB bitmask.
1282 */
1283static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1284 u32 max_rb_num, u32 se_num,
1285 u32 sh_per_se)
1286{
1287 u32 data, mask;
1288
1289 data = RREG32(CC_RB_BACKEND_DISABLE);
1290 if (data & 1)
1291 data &= BACKEND_DISABLE_MASK;
1292 else
1293 data = 0;
1294 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1295
1296 data >>= BACKEND_DISABLE_SHIFT;
1297
1298 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1299
1300 return data & mask;
1301}
1302
1303/**
1304 * cik_setup_rb - setup the RBs on the asic
1305 *
1306 * @rdev: radeon_device pointer
1307 * @se_num: number of SEs (shader engines) for the asic
1308 * @sh_per_se: number of SH blocks per SE for the asic
1309 * @max_rb_num: max RBs (render backends) for the asic
1310 *
1311 * Configures per-SE/SH RB registers (CIK).
1312 */
1313static void cik_setup_rb(struct radeon_device *rdev,
1314 u32 se_num, u32 sh_per_se,
1315 u32 max_rb_num)
1316{
1317 int i, j;
1318 u32 data, mask;
1319 u32 disabled_rbs = 0;
1320 u32 enabled_rbs = 0;
1321
1322 for (i = 0; i < se_num; i++) {
1323 for (j = 0; j < sh_per_se; j++) {
1324 cik_select_se_sh(rdev, i, j);
1325 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1326 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1327 }
1328 }
1329 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1330
1331 mask = 1;
1332 for (i = 0; i < max_rb_num; i++) {
1333 if (!(disabled_rbs & mask))
1334 enabled_rbs |= mask;
1335 mask <<= 1;
1336 }
1337
1338 for (i = 0; i < se_num; i++) {
1339 cik_select_se_sh(rdev, i, 0xffffffff);
1340 data = 0;
1341 for (j = 0; j < sh_per_se; j++) {
1342 switch (enabled_rbs & 3) {
1343 case 1:
1344 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1345 break;
1346 case 2:
1347 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1348 break;
1349 case 3:
1350 default:
1351 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1352 break;
1353 }
1354 enabled_rbs >>= 2;
1355 }
1356 WREG32(PA_SC_RASTER_CONFIG, data);
1357 }
1358 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1359}
1360
1361/**
1362 * cik_gpu_init - setup the 3D engine
1363 *
1364 * @rdev: radeon_device pointer
1365 *
1366 * Configures the 3D engine and tiling configuration
1367 * registers so that the 3D engine is usable.
1368 */
1369static void cik_gpu_init(struct radeon_device *rdev)
1370{
1371 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1372 u32 mc_shared_chmap, mc_arb_ramcfg;
1373 u32 hdp_host_path_cntl;
1374 u32 tmp;
1375 int i, j;
1376
1377 switch (rdev->family) {
1378 case CHIP_BONAIRE:
1379 rdev->config.cik.max_shader_engines = 2;
1380 rdev->config.cik.max_tile_pipes = 4;
1381 rdev->config.cik.max_cu_per_sh = 7;
1382 rdev->config.cik.max_sh_per_se = 1;
1383 rdev->config.cik.max_backends_per_se = 2;
1384 rdev->config.cik.max_texture_channel_caches = 4;
1385 rdev->config.cik.max_gprs = 256;
1386 rdev->config.cik.max_gs_threads = 32;
1387 rdev->config.cik.max_hw_contexts = 8;
1388
1389 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1390 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1391 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1392 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1393 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1394 break;
1395 case CHIP_KAVERI:
1396 /* TODO */
1397 break;
1398 case CHIP_KABINI:
1399 default:
1400 rdev->config.cik.max_shader_engines = 1;
1401 rdev->config.cik.max_tile_pipes = 2;
1402 rdev->config.cik.max_cu_per_sh = 2;
1403 rdev->config.cik.max_sh_per_se = 1;
1404 rdev->config.cik.max_backends_per_se = 1;
1405 rdev->config.cik.max_texture_channel_caches = 2;
1406 rdev->config.cik.max_gprs = 256;
1407 rdev->config.cik.max_gs_threads = 16;
1408 rdev->config.cik.max_hw_contexts = 8;
1409
1410 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1411 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1412 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1413 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1414 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1415 break;
1416 }
1417
1418 /* Initialize HDP */
1419 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1420 WREG32((0x2c14 + j), 0x00000000);
1421 WREG32((0x2c18 + j), 0x00000000);
1422 WREG32((0x2c1c + j), 0x00000000);
1423 WREG32((0x2c20 + j), 0x00000000);
1424 WREG32((0x2c24 + j), 0x00000000);
1425 }
1426
1427 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1428
1429 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1430
1431 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1432 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1433
1434 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1435 rdev->config.cik.mem_max_burst_length_bytes = 256;
1436 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1437 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1438 if (rdev->config.cik.mem_row_size_in_kb > 4)
1439 rdev->config.cik.mem_row_size_in_kb = 4;
1440 /* XXX use MC settings? */
1441 rdev->config.cik.shader_engine_tile_size = 32;
1442 rdev->config.cik.num_gpus = 1;
1443 rdev->config.cik.multi_gpu_tile_size = 64;
1444
1445 /* fix up row size */
1446 gb_addr_config &= ~ROW_SIZE_MASK;
1447 switch (rdev->config.cik.mem_row_size_in_kb) {
1448 case 1:
1449 default:
1450 gb_addr_config |= ROW_SIZE(0);
1451 break;
1452 case 2:
1453 gb_addr_config |= ROW_SIZE(1);
1454 break;
1455 case 4:
1456 gb_addr_config |= ROW_SIZE(2);
1457 break;
1458 }
1459
1460 /* setup tiling info dword. gb_addr_config is not adequate since it does
1461 * not have bank info, so create a custom tiling dword.
1462 * bits 3:0 num_pipes
1463 * bits 7:4 num_banks
1464 * bits 11:8 group_size
1465 * bits 15:12 row_size
1466 */
1467 rdev->config.cik.tile_config = 0;
1468 switch (rdev->config.cik.num_tile_pipes) {
1469 case 1:
1470 rdev->config.cik.tile_config |= (0 << 0);
1471 break;
1472 case 2:
1473 rdev->config.cik.tile_config |= (1 << 0);
1474 break;
1475 case 4:
1476 rdev->config.cik.tile_config |= (2 << 0);
1477 break;
1478 case 8:
1479 default:
1480 /* XXX what about 12? */
1481 rdev->config.cik.tile_config |= (3 << 0);
1482 break;
1483 }
1484 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1485 rdev->config.cik.tile_config |= 1 << 4;
1486 else
1487 rdev->config.cik.tile_config |= 0 << 4;
1488 rdev->config.cik.tile_config |=
1489 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1490 rdev->config.cik.tile_config |=
1491 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1492
1493 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1494 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1495 WREG32(DMIF_ADDR_CALC, gb_addr_config);
21a93e13
AD
1496 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1497 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
87167bb1
CK
1498 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1499 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1500 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
8cc1a532
AD
1501
1502 cik_tiling_mode_table_init(rdev);
1503
1504 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1505 rdev->config.cik.max_sh_per_se,
1506 rdev->config.cik.max_backends_per_se);
1507
1508 /* set HW defaults for 3D engine */
1509 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1510
1511 WREG32(SX_DEBUG_1, 0x20);
1512
1513 WREG32(TA_CNTL_AUX, 0x00010000);
1514
1515 tmp = RREG32(SPI_CONFIG_CNTL);
1516 tmp |= 0x03000000;
1517 WREG32(SPI_CONFIG_CNTL, tmp);
1518
1519 WREG32(SQ_CONFIG, 1);
1520
1521 WREG32(DB_DEBUG, 0);
1522
1523 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1524 tmp |= 0x00000400;
1525 WREG32(DB_DEBUG2, tmp);
1526
1527 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1528 tmp |= 0x00020200;
1529 WREG32(DB_DEBUG3, tmp);
1530
1531 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1532 tmp |= 0x00018208;
1533 WREG32(CB_HW_CONTROL, tmp);
1534
1535 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1536
1537 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1538 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1539 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1540 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1541
1542 WREG32(VGT_NUM_INSTANCES, 1);
1543
1544 WREG32(CP_PERFMON_CNTL, 0);
1545
1546 WREG32(SQ_CONFIG, 0);
1547
1548 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1549 FORCE_EOV_MAX_REZ_CNT(255)));
1550
1551 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1552 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1553
1554 WREG32(VGT_GS_VERTEX_REUSE, 16);
1555 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1556
1557 tmp = RREG32(HDP_MISC_CNTL);
1558 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1559 WREG32(HDP_MISC_CNTL, tmp);
1560
1561 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1562 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1563
1564 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1565 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1566
1567 udelay(50);
1568}
1569
2cae3bc3
AD
1570/*
1571 * GPU scratch registers helpers function.
1572 */
1573/**
1574 * cik_scratch_init - setup driver info for CP scratch regs
1575 *
1576 * @rdev: radeon_device pointer
1577 *
1578 * Set up the number and offset of the CP scratch registers.
1579 * NOTE: use of CP scratch registers is a legacy inferface and
1580 * is not used by default on newer asics (r6xx+). On newer asics,
1581 * memory buffers are used for fences rather than scratch regs.
1582 */
1583static void cik_scratch_init(struct radeon_device *rdev)
1584{
1585 int i;
1586
1587 rdev->scratch.num_reg = 7;
1588 rdev->scratch.reg_base = SCRATCH_REG0;
1589 for (i = 0; i < rdev->scratch.num_reg; i++) {
1590 rdev->scratch.free[i] = true;
1591 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1592 }
1593}
1594
fbc832c7
AD
1595/**
1596 * cik_ring_test - basic gfx ring test
1597 *
1598 * @rdev: radeon_device pointer
1599 * @ring: radeon_ring structure holding ring information
1600 *
1601 * Allocate a scratch register and write to it using the gfx ring (CIK).
1602 * Provides a basic gfx ring test to verify that the ring is working.
1603 * Used by cik_cp_gfx_resume();
1604 * Returns 0 on success, error on failure.
1605 */
1606int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1607{
1608 uint32_t scratch;
1609 uint32_t tmp = 0;
1610 unsigned i;
1611 int r;
1612
1613 r = radeon_scratch_get(rdev, &scratch);
1614 if (r) {
1615 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1616 return r;
1617 }
1618 WREG32(scratch, 0xCAFEDEAD);
1619 r = radeon_ring_lock(rdev, ring, 3);
1620 if (r) {
1621 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1622 radeon_scratch_free(rdev, scratch);
1623 return r;
1624 }
1625 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1626 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1627 radeon_ring_write(ring, 0xDEADBEEF);
1628 radeon_ring_unlock_commit(rdev, ring);
1629 for (i = 0; i < rdev->usec_timeout; i++) {
1630 tmp = RREG32(scratch);
1631 if (tmp == 0xDEADBEEF)
1632 break;
1633 DRM_UDELAY(1);
1634 }
1635 if (i < rdev->usec_timeout) {
1636 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1637 } else {
1638 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1639 ring->idx, scratch, tmp);
1640 r = -EINVAL;
1641 }
1642 radeon_scratch_free(rdev, scratch);
1643 return r;
1644}
1645
2cae3bc3
AD
1646/**
1647 * cik_fence_ring_emit - emit a fence on the gfx ring
1648 *
1649 * @rdev: radeon_device pointer
1650 * @fence: radeon fence object
1651 *
1652 * Emits a fence sequnce number on the gfx ring and flushes
1653 * GPU caches.
1654 */
1655void cik_fence_ring_emit(struct radeon_device *rdev,
1656 struct radeon_fence *fence)
1657{
1658 struct radeon_ring *ring = &rdev->ring[fence->ring];
1659 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1660
1661 /* EVENT_WRITE_EOP - flush caches, send int */
1662 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1663 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1664 EOP_TC_ACTION_EN |
1665 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1666 EVENT_INDEX(5)));
1667 radeon_ring_write(ring, addr & 0xfffffffc);
1668 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1669 radeon_ring_write(ring, fence->seq);
1670 radeon_ring_write(ring, 0);
1671 /* HDP flush */
1672 /* We should be using the new WAIT_REG_MEM special op packet here
1673 * but it causes the CP to hang
1674 */
1675 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1676 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1677 WRITE_DATA_DST_SEL(0)));
1678 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1679 radeon_ring_write(ring, 0);
1680 radeon_ring_write(ring, 0);
1681}
1682
1683void cik_semaphore_ring_emit(struct radeon_device *rdev,
1684 struct radeon_ring *ring,
1685 struct radeon_semaphore *semaphore,
1686 bool emit_wait)
1687{
1688 uint64_t addr = semaphore->gpu_addr;
1689 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1690
1691 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1692 radeon_ring_write(ring, addr & 0xffffffff);
1693 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1694}
1695
1696/*
1697 * IB stuff
1698 */
1699/**
1700 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1701 *
1702 * @rdev: radeon_device pointer
1703 * @ib: radeon indirect buffer object
1704 *
1705 * Emits an DE (drawing engine) or CE (constant engine) IB
1706 * on the gfx ring. IBs are usually generated by userspace
1707 * acceleration drivers and submitted to the kernel for
1708 * sheduling on the ring. This function schedules the IB
1709 * on the gfx ring for execution by the GPU.
1710 */
1711void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1712{
1713 struct radeon_ring *ring = &rdev->ring[ib->ring];
1714 u32 header, control = INDIRECT_BUFFER_VALID;
1715
1716 if (ib->is_const_ib) {
1717 /* set switch buffer packet before const IB */
1718 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1719 radeon_ring_write(ring, 0);
1720
1721 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1722 } else {
1723 u32 next_rptr;
1724 if (ring->rptr_save_reg) {
1725 next_rptr = ring->wptr + 3 + 4;
1726 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1727 radeon_ring_write(ring, ((ring->rptr_save_reg -
1728 PACKET3_SET_UCONFIG_REG_START) >> 2));
1729 radeon_ring_write(ring, next_rptr);
1730 } else if (rdev->wb.enabled) {
1731 next_rptr = ring->wptr + 5 + 4;
1732 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1733 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1734 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1735 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1736 radeon_ring_write(ring, next_rptr);
1737 }
1738
1739 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1740 }
1741
1742 control |= ib->length_dw |
1743 (ib->vm ? (ib->vm->id << 24) : 0);
1744
1745 radeon_ring_write(ring, header);
1746 radeon_ring_write(ring,
1747#ifdef __BIG_ENDIAN
1748 (2 << 0) |
1749#endif
1750 (ib->gpu_addr & 0xFFFFFFFC));
1751 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1752 radeon_ring_write(ring, control);
1753}
1754
fbc832c7
AD
1755/**
1756 * cik_ib_test - basic gfx ring IB test
1757 *
1758 * @rdev: radeon_device pointer
1759 * @ring: radeon_ring structure holding ring information
1760 *
1761 * Allocate an IB and execute it on the gfx ring (CIK).
1762 * Provides a basic gfx ring test to verify that IBs are working.
1763 * Returns 0 on success, error on failure.
1764 */
1765int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1766{
1767 struct radeon_ib ib;
1768 uint32_t scratch;
1769 uint32_t tmp = 0;
1770 unsigned i;
1771 int r;
1772
1773 r = radeon_scratch_get(rdev, &scratch);
1774 if (r) {
1775 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1776 return r;
1777 }
1778 WREG32(scratch, 0xCAFEDEAD);
1779 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1780 if (r) {
1781 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1782 return r;
1783 }
1784 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1785 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1786 ib.ptr[2] = 0xDEADBEEF;
1787 ib.length_dw = 3;
1788 r = radeon_ib_schedule(rdev, &ib, NULL);
1789 if (r) {
1790 radeon_scratch_free(rdev, scratch);
1791 radeon_ib_free(rdev, &ib);
1792 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1793 return r;
1794 }
1795 r = radeon_fence_wait(ib.fence, false);
1796 if (r) {
1797 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1798 return r;
1799 }
1800 for (i = 0; i < rdev->usec_timeout; i++) {
1801 tmp = RREG32(scratch);
1802 if (tmp == 0xDEADBEEF)
1803 break;
1804 DRM_UDELAY(1);
1805 }
1806 if (i < rdev->usec_timeout) {
1807 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1808 } else {
1809 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1810 scratch, tmp);
1811 r = -EINVAL;
1812 }
1813 radeon_scratch_free(rdev, scratch);
1814 radeon_ib_free(rdev, &ib);
1815 return r;
1816}
1817
841cf442
AD
1818/*
1819 * CP.
1820 * On CIK, gfx and compute now have independant command processors.
1821 *
1822 * GFX
1823 * Gfx consists of a single ring and can process both gfx jobs and
1824 * compute jobs. The gfx CP consists of three microengines (ME):
1825 * PFP - Pre-Fetch Parser
1826 * ME - Micro Engine
1827 * CE - Constant Engine
1828 * The PFP and ME make up what is considered the Drawing Engine (DE).
1829 * The CE is an asynchronous engine used for updating buffer desciptors
1830 * used by the DE so that they can be loaded into cache in parallel
1831 * while the DE is processing state update packets.
1832 *
1833 * Compute
1834 * The compute CP consists of two microengines (ME):
1835 * MEC1 - Compute MicroEngine 1
1836 * MEC2 - Compute MicroEngine 2
1837 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1838 * The queues are exposed to userspace and are programmed directly
1839 * by the compute runtime.
1840 */
1841/**
1842 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1843 *
1844 * @rdev: radeon_device pointer
1845 * @enable: enable or disable the MEs
1846 *
1847 * Halts or unhalts the gfx MEs.
1848 */
1849static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1850{
1851 if (enable)
1852 WREG32(CP_ME_CNTL, 0);
1853 else {
1854 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1855 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1856 }
1857 udelay(50);
1858}
1859
1860/**
1861 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1862 *
1863 * @rdev: radeon_device pointer
1864 *
1865 * Loads the gfx PFP, ME, and CE ucode.
1866 * Returns 0 for success, -EINVAL if the ucode is not available.
1867 */
1868static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1869{
1870 const __be32 *fw_data;
1871 int i;
1872
1873 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1874 return -EINVAL;
1875
1876 cik_cp_gfx_enable(rdev, false);
1877
1878 /* PFP */
1879 fw_data = (const __be32 *)rdev->pfp_fw->data;
1880 WREG32(CP_PFP_UCODE_ADDR, 0);
1881 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1882 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1883 WREG32(CP_PFP_UCODE_ADDR, 0);
1884
1885 /* CE */
1886 fw_data = (const __be32 *)rdev->ce_fw->data;
1887 WREG32(CP_CE_UCODE_ADDR, 0);
1888 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1889 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1890 WREG32(CP_CE_UCODE_ADDR, 0);
1891
1892 /* ME */
1893 fw_data = (const __be32 *)rdev->me_fw->data;
1894 WREG32(CP_ME_RAM_WADDR, 0);
1895 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1896 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1897 WREG32(CP_ME_RAM_WADDR, 0);
1898
1899 WREG32(CP_PFP_UCODE_ADDR, 0);
1900 WREG32(CP_CE_UCODE_ADDR, 0);
1901 WREG32(CP_ME_RAM_WADDR, 0);
1902 WREG32(CP_ME_RAM_RADDR, 0);
1903 return 0;
1904}
1905
1906/**
1907 * cik_cp_gfx_start - start the gfx ring
1908 *
1909 * @rdev: radeon_device pointer
1910 *
1911 * Enables the ring and loads the clear state context and other
1912 * packets required to init the ring.
1913 * Returns 0 for success, error for failure.
1914 */
1915static int cik_cp_gfx_start(struct radeon_device *rdev)
1916{
1917 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1918 int r, i;
1919
1920 /* init the CP */
1921 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1922 WREG32(CP_ENDIAN_SWAP, 0);
1923 WREG32(CP_DEVICE_ID, 1);
1924
1925 cik_cp_gfx_enable(rdev, true);
1926
1927 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1928 if (r) {
1929 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1930 return r;
1931 }
1932
1933 /* init the CE partitions. CE only used for gfx on CIK */
1934 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1935 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1936 radeon_ring_write(ring, 0xc000);
1937 radeon_ring_write(ring, 0xc000);
1938
1939 /* setup clear context state */
1940 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1941 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1942
1943 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1944 radeon_ring_write(ring, 0x80000000);
1945 radeon_ring_write(ring, 0x80000000);
1946
1947 for (i = 0; i < cik_default_size; i++)
1948 radeon_ring_write(ring, cik_default_state[i]);
1949
1950 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1951 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1952
1953 /* set clear context state */
1954 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1955 radeon_ring_write(ring, 0);
1956
1957 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1958 radeon_ring_write(ring, 0x00000316);
1959 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1960 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1961
1962 radeon_ring_unlock_commit(rdev, ring);
1963
1964 return 0;
1965}
1966
1967/**
1968 * cik_cp_gfx_fini - stop the gfx ring
1969 *
1970 * @rdev: radeon_device pointer
1971 *
1972 * Stop the gfx ring and tear down the driver ring
1973 * info.
1974 */
1975static void cik_cp_gfx_fini(struct radeon_device *rdev)
1976{
1977 cik_cp_gfx_enable(rdev, false);
1978 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1979}
1980
1981/**
1982 * cik_cp_gfx_resume - setup the gfx ring buffer registers
1983 *
1984 * @rdev: radeon_device pointer
1985 *
1986 * Program the location and size of the gfx ring buffer
1987 * and test it to make sure it's working.
1988 * Returns 0 for success, error for failure.
1989 */
1990static int cik_cp_gfx_resume(struct radeon_device *rdev)
1991{
1992 struct radeon_ring *ring;
1993 u32 tmp;
1994 u32 rb_bufsz;
1995 u64 rb_addr;
1996 int r;
1997
1998 WREG32(CP_SEM_WAIT_TIMER, 0x0);
1999 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2000
2001 /* Set the write pointer delay */
2002 WREG32(CP_RB_WPTR_DELAY, 0);
2003
2004 /* set the RB to use vmid 0 */
2005 WREG32(CP_RB_VMID, 0);
2006
2007 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2008
2009 /* ring 0 - compute and gfx */
2010 /* Set ring buffer size */
2011 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2012 rb_bufsz = drm_order(ring->ring_size / 8);
2013 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2014#ifdef __BIG_ENDIAN
2015 tmp |= BUF_SWAP_32BIT;
2016#endif
2017 WREG32(CP_RB0_CNTL, tmp);
2018
2019 /* Initialize the ring buffer's read and write pointers */
2020 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2021 ring->wptr = 0;
2022 WREG32(CP_RB0_WPTR, ring->wptr);
2023
2024 /* set the wb address wether it's enabled or not */
2025 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2026 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2027
2028 /* scratch register shadowing is no longer supported */
2029 WREG32(SCRATCH_UMSK, 0);
2030
2031 if (!rdev->wb.enabled)
2032 tmp |= RB_NO_UPDATE;
2033
2034 mdelay(1);
2035 WREG32(CP_RB0_CNTL, tmp);
2036
2037 rb_addr = ring->gpu_addr >> 8;
2038 WREG32(CP_RB0_BASE, rb_addr);
2039 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2040
2041 ring->rptr = RREG32(CP_RB0_RPTR);
2042
2043 /* start the ring */
2044 cik_cp_gfx_start(rdev);
2045 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2046 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2047 if (r) {
2048 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2049 return r;
2050 }
2051 return 0;
2052}
2053
2054/**
2055 * cik_cp_compute_enable - enable/disable the compute CP MEs
2056 *
2057 * @rdev: radeon_device pointer
2058 * @enable: enable or disable the MEs
2059 *
2060 * Halts or unhalts the compute MEs.
2061 */
2062static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2063{
2064 if (enable)
2065 WREG32(CP_MEC_CNTL, 0);
2066 else
2067 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2068 udelay(50);
2069}
2070
2071/**
2072 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2073 *
2074 * @rdev: radeon_device pointer
2075 *
2076 * Loads the compute MEC1&2 ucode.
2077 * Returns 0 for success, -EINVAL if the ucode is not available.
2078 */
2079static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2080{
2081 const __be32 *fw_data;
2082 int i;
2083
2084 if (!rdev->mec_fw)
2085 return -EINVAL;
2086
2087 cik_cp_compute_enable(rdev, false);
2088
2089 /* MEC1 */
2090 fw_data = (const __be32 *)rdev->mec_fw->data;
2091 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2092 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2093 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2094 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2095
2096 if (rdev->family == CHIP_KAVERI) {
2097 /* MEC2 */
2098 fw_data = (const __be32 *)rdev->mec_fw->data;
2099 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2100 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2101 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2102 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2103 }
2104
2105 return 0;
2106}
2107
2108/**
2109 * cik_cp_compute_start - start the compute queues
2110 *
2111 * @rdev: radeon_device pointer
2112 *
2113 * Enable the compute queues.
2114 * Returns 0 for success, error for failure.
2115 */
2116static int cik_cp_compute_start(struct radeon_device *rdev)
2117{
2118 //todo
2119 return 0;
2120}
2121
2122/**
2123 * cik_cp_compute_fini - stop the compute queues
2124 *
2125 * @rdev: radeon_device pointer
2126 *
2127 * Stop the compute queues and tear down the driver queue
2128 * info.
2129 */
2130static void cik_cp_compute_fini(struct radeon_device *rdev)
2131{
2132 cik_cp_compute_enable(rdev, false);
2133 //todo
2134}
2135
2136/**
2137 * cik_cp_compute_resume - setup the compute queue registers
2138 *
2139 * @rdev: radeon_device pointer
2140 *
2141 * Program the compute queues and test them to make sure they
2142 * are working.
2143 * Returns 0 for success, error for failure.
2144 */
2145static int cik_cp_compute_resume(struct radeon_device *rdev)
2146{
2147 int r;
2148
2149 //todo
2150 r = cik_cp_compute_start(rdev);
2151 if (r)
2152 return r;
2153 return 0;
2154}
2155
2156/* XXX temporary wrappers to handle both compute and gfx */
2157/* XXX */
2158static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2159{
2160 cik_cp_gfx_enable(rdev, enable);
2161 cik_cp_compute_enable(rdev, enable);
2162}
2163
2164/* XXX */
2165static int cik_cp_load_microcode(struct radeon_device *rdev)
2166{
2167 int r;
2168
2169 r = cik_cp_gfx_load_microcode(rdev);
2170 if (r)
2171 return r;
2172 r = cik_cp_compute_load_microcode(rdev);
2173 if (r)
2174 return r;
2175
2176 return 0;
2177}
2178
2179/* XXX */
2180static void cik_cp_fini(struct radeon_device *rdev)
2181{
2182 cik_cp_gfx_fini(rdev);
2183 cik_cp_compute_fini(rdev);
2184}
2185
2186/* XXX */
2187static int cik_cp_resume(struct radeon_device *rdev)
2188{
2189 int r;
2190
2191 /* Reset all cp blocks */
2192 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2193 RREG32(GRBM_SOFT_RESET);
2194 mdelay(15);
2195 WREG32(GRBM_SOFT_RESET, 0);
2196 RREG32(GRBM_SOFT_RESET);
2197
2198 r = cik_cp_load_microcode(rdev);
2199 if (r)
2200 return r;
2201
2202 r = cik_cp_gfx_resume(rdev);
2203 if (r)
2204 return r;
2205 r = cik_cp_compute_resume(rdev);
2206 if (r)
2207 return r;
2208
2209 return 0;
2210}
2211
21a93e13
AD
2212/*
2213 * sDMA - System DMA
2214 * Starting with CIK, the GPU has new asynchronous
2215 * DMA engines. These engines are used for compute
2216 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2217 * and each one supports 1 ring buffer used for gfx
2218 * and 2 queues used for compute.
2219 *
2220 * The programming model is very similar to the CP
2221 * (ring buffer, IBs, etc.), but sDMA has it's own
2222 * packet format that is different from the PM4 format
2223 * used by the CP. sDMA supports copying data, writing
2224 * embedded data, solid fills, and a number of other
2225 * things. It also has support for tiling/detiling of
2226 * buffers.
2227 */
2228/**
2229 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2230 *
2231 * @rdev: radeon_device pointer
2232 * @ib: IB object to schedule
2233 *
2234 * Schedule an IB in the DMA ring (CIK).
2235 */
2236void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2237 struct radeon_ib *ib)
2238{
2239 struct radeon_ring *ring = &rdev->ring[ib->ring];
2240 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2241
2242 if (rdev->wb.enabled) {
2243 u32 next_rptr = ring->wptr + 5;
2244 while ((next_rptr & 7) != 4)
2245 next_rptr++;
2246 next_rptr += 4;
2247 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2248 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2249 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2250 radeon_ring_write(ring, 1); /* number of DWs to follow */
2251 radeon_ring_write(ring, next_rptr);
2252 }
2253
2254 /* IB packet must end on a 8 DW boundary */
2255 while ((ring->wptr & 7) != 4)
2256 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2257 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2258 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2259 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2260 radeon_ring_write(ring, ib->length_dw);
2261
2262}
2263
2264/**
2265 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2266 *
2267 * @rdev: radeon_device pointer
2268 * @fence: radeon fence object
2269 *
2270 * Add a DMA fence packet to the ring to write
2271 * the fence seq number and DMA trap packet to generate
2272 * an interrupt if needed (CIK).
2273 */
2274void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2275 struct radeon_fence *fence)
2276{
2277 struct radeon_ring *ring = &rdev->ring[fence->ring];
2278 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2279 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2280 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2281 u32 ref_and_mask;
2282
2283 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2284 ref_and_mask = SDMA0;
2285 else
2286 ref_and_mask = SDMA1;
2287
2288 /* write the fence */
2289 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2290 radeon_ring_write(ring, addr & 0xffffffff);
2291 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2292 radeon_ring_write(ring, fence->seq);
2293 /* generate an interrupt */
2294 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2295 /* flush HDP */
2296 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2297 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2298 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2299 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2300 radeon_ring_write(ring, ref_and_mask); /* MASK */
2301 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2302}
2303
2304/**
2305 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2306 *
2307 * @rdev: radeon_device pointer
2308 * @ring: radeon_ring structure holding ring information
2309 * @semaphore: radeon semaphore object
2310 * @emit_wait: wait or signal semaphore
2311 *
2312 * Add a DMA semaphore packet to the ring wait on or signal
2313 * other rings (CIK).
2314 */
2315void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2316 struct radeon_ring *ring,
2317 struct radeon_semaphore *semaphore,
2318 bool emit_wait)
2319{
2320 u64 addr = semaphore->gpu_addr;
2321 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2322
2323 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2324 radeon_ring_write(ring, addr & 0xfffffff8);
2325 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2326}
2327
2328/**
2329 * cik_sdma_gfx_stop - stop the gfx async dma engines
2330 *
2331 * @rdev: radeon_device pointer
2332 *
2333 * Stop the gfx async dma ring buffers (CIK).
2334 */
2335static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2336{
2337 u32 rb_cntl, reg_offset;
2338 int i;
2339
2340 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2341
2342 for (i = 0; i < 2; i++) {
2343 if (i == 0)
2344 reg_offset = SDMA0_REGISTER_OFFSET;
2345 else
2346 reg_offset = SDMA1_REGISTER_OFFSET;
2347 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2348 rb_cntl &= ~SDMA_RB_ENABLE;
2349 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2350 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2351 }
2352}
2353
2354/**
2355 * cik_sdma_rlc_stop - stop the compute async dma engines
2356 *
2357 * @rdev: radeon_device pointer
2358 *
2359 * Stop the compute async dma queues (CIK).
2360 */
2361static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2362{
2363 /* XXX todo */
2364}
2365
2366/**
2367 * cik_sdma_enable - stop the async dma engines
2368 *
2369 * @rdev: radeon_device pointer
2370 * @enable: enable/disable the DMA MEs.
2371 *
2372 * Halt or unhalt the async dma engines (CIK).
2373 */
2374static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2375{
2376 u32 me_cntl, reg_offset;
2377 int i;
2378
2379 for (i = 0; i < 2; i++) {
2380 if (i == 0)
2381 reg_offset = SDMA0_REGISTER_OFFSET;
2382 else
2383 reg_offset = SDMA1_REGISTER_OFFSET;
2384 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2385 if (enable)
2386 me_cntl &= ~SDMA_HALT;
2387 else
2388 me_cntl |= SDMA_HALT;
2389 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2390 }
2391}
2392
2393/**
2394 * cik_sdma_gfx_resume - setup and start the async dma engines
2395 *
2396 * @rdev: radeon_device pointer
2397 *
2398 * Set up the gfx DMA ring buffers and enable them (CIK).
2399 * Returns 0 for success, error for failure.
2400 */
2401static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2402{
2403 struct radeon_ring *ring;
2404 u32 rb_cntl, ib_cntl;
2405 u32 rb_bufsz;
2406 u32 reg_offset, wb_offset;
2407 int i, r;
2408
2409 for (i = 0; i < 2; i++) {
2410 if (i == 0) {
2411 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2412 reg_offset = SDMA0_REGISTER_OFFSET;
2413 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2414 } else {
2415 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2416 reg_offset = SDMA1_REGISTER_OFFSET;
2417 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2418 }
2419
2420 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2421 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2422
2423 /* Set ring buffer size in dwords */
2424 rb_bufsz = drm_order(ring->ring_size / 4);
2425 rb_cntl = rb_bufsz << 1;
2426#ifdef __BIG_ENDIAN
2427 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2428#endif
2429 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2430
2431 /* Initialize the ring buffer's read and write pointers */
2432 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2433 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2434
2435 /* set the wb address whether it's enabled or not */
2436 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2437 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2438 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2439 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2440
2441 if (rdev->wb.enabled)
2442 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2443
2444 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2445 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2446
2447 ring->wptr = 0;
2448 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2449
2450 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2451
2452 /* enable DMA RB */
2453 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2454
2455 ib_cntl = SDMA_IB_ENABLE;
2456#ifdef __BIG_ENDIAN
2457 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2458#endif
2459 /* enable DMA IBs */
2460 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2461
2462 ring->ready = true;
2463
2464 r = radeon_ring_test(rdev, ring->idx, ring);
2465 if (r) {
2466 ring->ready = false;
2467 return r;
2468 }
2469 }
2470
2471 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2472
2473 return 0;
2474}
2475
2476/**
2477 * cik_sdma_rlc_resume - setup and start the async dma engines
2478 *
2479 * @rdev: radeon_device pointer
2480 *
2481 * Set up the compute DMA queues and enable them (CIK).
2482 * Returns 0 for success, error for failure.
2483 */
2484static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2485{
2486 /* XXX todo */
2487 return 0;
2488}
2489
2490/**
2491 * cik_sdma_load_microcode - load the sDMA ME ucode
2492 *
2493 * @rdev: radeon_device pointer
2494 *
2495 * Loads the sDMA0/1 ucode.
2496 * Returns 0 for success, -EINVAL if the ucode is not available.
2497 */
2498static int cik_sdma_load_microcode(struct radeon_device *rdev)
2499{
2500 const __be32 *fw_data;
2501 int i;
2502
2503 if (!rdev->sdma_fw)
2504 return -EINVAL;
2505
2506 /* stop the gfx rings and rlc compute queues */
2507 cik_sdma_gfx_stop(rdev);
2508 cik_sdma_rlc_stop(rdev);
2509
2510 /* halt the MEs */
2511 cik_sdma_enable(rdev, false);
2512
2513 /* sdma0 */
2514 fw_data = (const __be32 *)rdev->sdma_fw->data;
2515 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2516 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2517 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2518 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2519
2520 /* sdma1 */
2521 fw_data = (const __be32 *)rdev->sdma_fw->data;
2522 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2523 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2524 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2525 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2526
2527 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2528 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2529 return 0;
2530}
2531
2532/**
2533 * cik_sdma_resume - setup and start the async dma engines
2534 *
2535 * @rdev: radeon_device pointer
2536 *
2537 * Set up the DMA engines and enable them (CIK).
2538 * Returns 0 for success, error for failure.
2539 */
2540static int cik_sdma_resume(struct radeon_device *rdev)
2541{
2542 int r;
2543
2544 /* Reset dma */
2545 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2546 RREG32(SRBM_SOFT_RESET);
2547 udelay(50);
2548 WREG32(SRBM_SOFT_RESET, 0);
2549 RREG32(SRBM_SOFT_RESET);
2550
2551 r = cik_sdma_load_microcode(rdev);
2552 if (r)
2553 return r;
2554
2555 /* unhalt the MEs */
2556 cik_sdma_enable(rdev, true);
2557
2558 /* start the gfx rings and rlc compute queues */
2559 r = cik_sdma_gfx_resume(rdev);
2560 if (r)
2561 return r;
2562 r = cik_sdma_rlc_resume(rdev);
2563 if (r)
2564 return r;
2565
2566 return 0;
2567}
2568
2569/**
2570 * cik_sdma_fini - tear down the async dma engines
2571 *
2572 * @rdev: radeon_device pointer
2573 *
2574 * Stop the async dma engines and free the rings (CIK).
2575 */
2576static void cik_sdma_fini(struct radeon_device *rdev)
2577{
2578 /* stop the gfx rings and rlc compute queues */
2579 cik_sdma_gfx_stop(rdev);
2580 cik_sdma_rlc_stop(rdev);
2581 /* halt the MEs */
2582 cik_sdma_enable(rdev, false);
2583 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2584 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2585 /* XXX - compute dma queue tear down */
2586}
2587
2588/**
2589 * cik_copy_dma - copy pages using the DMA engine
2590 *
2591 * @rdev: radeon_device pointer
2592 * @src_offset: src GPU address
2593 * @dst_offset: dst GPU address
2594 * @num_gpu_pages: number of GPU pages to xfer
2595 * @fence: radeon fence object
2596 *
2597 * Copy GPU paging using the DMA engine (CIK).
2598 * Used by the radeon ttm implementation to move pages if
2599 * registered as the asic copy callback.
2600 */
2601int cik_copy_dma(struct radeon_device *rdev,
2602 uint64_t src_offset, uint64_t dst_offset,
2603 unsigned num_gpu_pages,
2604 struct radeon_fence **fence)
2605{
2606 struct radeon_semaphore *sem = NULL;
2607 int ring_index = rdev->asic->copy.dma_ring_index;
2608 struct radeon_ring *ring = &rdev->ring[ring_index];
2609 u32 size_in_bytes, cur_size_in_bytes;
2610 int i, num_loops;
2611 int r = 0;
2612
2613 r = radeon_semaphore_create(rdev, &sem);
2614 if (r) {
2615 DRM_ERROR("radeon: moving bo (%d).\n", r);
2616 return r;
2617 }
2618
2619 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
2620 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
2621 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
2622 if (r) {
2623 DRM_ERROR("radeon: moving bo (%d).\n", r);
2624 radeon_semaphore_free(rdev, &sem, NULL);
2625 return r;
2626 }
2627
2628 if (radeon_fence_need_sync(*fence, ring->idx)) {
2629 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2630 ring->idx);
2631 radeon_fence_note_sync(*fence, ring->idx);
2632 } else {
2633 radeon_semaphore_free(rdev, &sem, NULL);
2634 }
2635
2636 for (i = 0; i < num_loops; i++) {
2637 cur_size_in_bytes = size_in_bytes;
2638 if (cur_size_in_bytes > 0x1fffff)
2639 cur_size_in_bytes = 0x1fffff;
2640 size_in_bytes -= cur_size_in_bytes;
2641 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
2642 radeon_ring_write(ring, cur_size_in_bytes);
2643 radeon_ring_write(ring, 0); /* src/dst endian swap */
2644 radeon_ring_write(ring, src_offset & 0xffffffff);
2645 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
2646 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2647 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
2648 src_offset += cur_size_in_bytes;
2649 dst_offset += cur_size_in_bytes;
2650 }
2651
2652 r = radeon_fence_emit(rdev, fence, ring->idx);
2653 if (r) {
2654 radeon_ring_unlock_undo(rdev, ring);
2655 return r;
2656 }
2657
2658 radeon_ring_unlock_commit(rdev, ring);
2659 radeon_semaphore_free(rdev, &sem, *fence);
2660
2661 return r;
2662}
2663
2664/**
2665 * cik_sdma_ring_test - simple async dma engine test
2666 *
2667 * @rdev: radeon_device pointer
2668 * @ring: radeon_ring structure holding ring information
2669 *
2670 * Test the DMA engine by writing using it to write an
2671 * value to memory. (CIK).
2672 * Returns 0 for success, error for failure.
2673 */
2674int cik_sdma_ring_test(struct radeon_device *rdev,
2675 struct radeon_ring *ring)
2676{
2677 unsigned i;
2678 int r;
2679 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2680 u32 tmp;
2681
2682 if (!ptr) {
2683 DRM_ERROR("invalid vram scratch pointer\n");
2684 return -EINVAL;
2685 }
2686
2687 tmp = 0xCAFEDEAD;
2688 writel(tmp, ptr);
2689
2690 r = radeon_ring_lock(rdev, ring, 4);
2691 if (r) {
2692 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2693 return r;
2694 }
2695 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2696 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2697 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
2698 radeon_ring_write(ring, 1); /* number of DWs to follow */
2699 radeon_ring_write(ring, 0xDEADBEEF);
2700 radeon_ring_unlock_commit(rdev, ring);
2701
2702 for (i = 0; i < rdev->usec_timeout; i++) {
2703 tmp = readl(ptr);
2704 if (tmp == 0xDEADBEEF)
2705 break;
2706 DRM_UDELAY(1);
2707 }
2708
2709 if (i < rdev->usec_timeout) {
2710 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2711 } else {
2712 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2713 ring->idx, tmp);
2714 r = -EINVAL;
2715 }
2716 return r;
2717}
2718
2719/**
2720 * cik_sdma_ib_test - test an IB on the DMA engine
2721 *
2722 * @rdev: radeon_device pointer
2723 * @ring: radeon_ring structure holding ring information
2724 *
2725 * Test a simple IB in the DMA ring (CIK).
2726 * Returns 0 on success, error on failure.
2727 */
2728int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2729{
2730 struct radeon_ib ib;
2731 unsigned i;
2732 int r;
2733 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2734 u32 tmp = 0;
2735
2736 if (!ptr) {
2737 DRM_ERROR("invalid vram scratch pointer\n");
2738 return -EINVAL;
2739 }
2740
2741 tmp = 0xCAFEDEAD;
2742 writel(tmp, ptr);
2743
2744 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2745 if (r) {
2746 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2747 return r;
2748 }
2749
2750 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2751 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
2752 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
2753 ib.ptr[3] = 1;
2754 ib.ptr[4] = 0xDEADBEEF;
2755 ib.length_dw = 5;
2756
2757 r = radeon_ib_schedule(rdev, &ib, NULL);
2758 if (r) {
2759 radeon_ib_free(rdev, &ib);
2760 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2761 return r;
2762 }
2763 r = radeon_fence_wait(ib.fence, false);
2764 if (r) {
2765 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2766 return r;
2767 }
2768 for (i = 0; i < rdev->usec_timeout; i++) {
2769 tmp = readl(ptr);
2770 if (tmp == 0xDEADBEEF)
2771 break;
2772 DRM_UDELAY(1);
2773 }
2774 if (i < rdev->usec_timeout) {
2775 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2776 } else {
2777 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
2778 r = -EINVAL;
2779 }
2780 radeon_ib_free(rdev, &ib);
2781 return r;
2782}
2783
6f2043ce 2784
cc066715 2785static void cik_print_gpu_status_regs(struct radeon_device *rdev)
6f2043ce 2786{
6f2043ce
AD
2787 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2788 RREG32(GRBM_STATUS));
2789 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2790 RREG32(GRBM_STATUS2));
2791 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2792 RREG32(GRBM_STATUS_SE0));
2793 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2794 RREG32(GRBM_STATUS_SE1));
2795 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2796 RREG32(GRBM_STATUS_SE2));
2797 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2798 RREG32(GRBM_STATUS_SE3));
2799 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2800 RREG32(SRBM_STATUS));
2801 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2802 RREG32(SRBM_STATUS2));
cc066715
AD
2803 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
2804 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
2805 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
2806 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
2807}
6f2043ce 2808
cc066715
AD
2809/**
2810 * cik_gpu_check_soft_reset - check which blocks are busy
2811 *
2812 * @rdev: radeon_device pointer
2813 *
2814 * Check which blocks are busy and return the relevant reset
2815 * mask to be used by cik_gpu_soft_reset().
2816 * Returns a mask of the blocks to be reset.
2817 */
2818static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
2819{
2820 u32 reset_mask = 0;
2821 u32 tmp;
6f2043ce 2822
cc066715
AD
2823 /* GRBM_STATUS */
2824 tmp = RREG32(GRBM_STATUS);
2825 if (tmp & (PA_BUSY | SC_BUSY |
2826 BCI_BUSY | SX_BUSY |
2827 TA_BUSY | VGT_BUSY |
2828 DB_BUSY | CB_BUSY |
2829 GDS_BUSY | SPI_BUSY |
2830 IA_BUSY | IA_BUSY_NO_DMA))
2831 reset_mask |= RADEON_RESET_GFX;
2832
2833 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
2834 reset_mask |= RADEON_RESET_CP;
2835
2836 /* GRBM_STATUS2 */
2837 tmp = RREG32(GRBM_STATUS2);
2838 if (tmp & RLC_BUSY)
2839 reset_mask |= RADEON_RESET_RLC;
2840
2841 /* SDMA0_STATUS_REG */
2842 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
2843 if (!(tmp & SDMA_IDLE))
2844 reset_mask |= RADEON_RESET_DMA;
2845
2846 /* SDMA1_STATUS_REG */
2847 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
2848 if (!(tmp & SDMA_IDLE))
2849 reset_mask |= RADEON_RESET_DMA1;
2850
2851 /* SRBM_STATUS2 */
2852 tmp = RREG32(SRBM_STATUS2);
2853 if (tmp & SDMA_BUSY)
2854 reset_mask |= RADEON_RESET_DMA;
2855
2856 if (tmp & SDMA1_BUSY)
2857 reset_mask |= RADEON_RESET_DMA1;
2858
2859 /* SRBM_STATUS */
2860 tmp = RREG32(SRBM_STATUS);
2861
2862 if (tmp & IH_BUSY)
2863 reset_mask |= RADEON_RESET_IH;
2864
2865 if (tmp & SEM_BUSY)
2866 reset_mask |= RADEON_RESET_SEM;
2867
2868 if (tmp & GRBM_RQ_PENDING)
2869 reset_mask |= RADEON_RESET_GRBM;
2870
2871 if (tmp & VMC_BUSY)
2872 reset_mask |= RADEON_RESET_VMC;
2873
2874 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
2875 MCC_BUSY | MCD_BUSY))
2876 reset_mask |= RADEON_RESET_MC;
2877
2878 if (evergreen_is_display_hung(rdev))
2879 reset_mask |= RADEON_RESET_DISPLAY;
2880
2881 /* Skip MC reset as it's mostly likely not hung, just busy */
2882 if (reset_mask & RADEON_RESET_MC) {
2883 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
2884 reset_mask &= ~RADEON_RESET_MC;
2885 }
2886
2887 return reset_mask;
6f2043ce
AD
2888}
2889
2890/**
cc066715 2891 * cik_gpu_soft_reset - soft reset GPU
6f2043ce
AD
2892 *
2893 * @rdev: radeon_device pointer
cc066715 2894 * @reset_mask: mask of which blocks to reset
6f2043ce 2895 *
cc066715 2896 * Soft reset the blocks specified in @reset_mask.
6f2043ce 2897 */
cc066715 2898static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
6f2043ce
AD
2899{
2900 struct evergreen_mc_save save;
cc066715
AD
2901 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
2902 u32 tmp;
2903
2904 if (reset_mask == 0)
2905 return;
2906
2907 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2908
2909 cik_print_gpu_status_regs(rdev);
2910 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
2911 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2912 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2913 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2914
2915 /* stop the rlc */
2916 cik_rlc_stop(rdev);
2917
2918 /* Disable GFX parsing/prefetching */
2919 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2920
2921 /* Disable MEC parsing/prefetching */
2922 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2923
2924 if (reset_mask & RADEON_RESET_DMA) {
2925 /* sdma0 */
2926 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
2927 tmp |= SDMA_HALT;
2928 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
2929 }
2930 if (reset_mask & RADEON_RESET_DMA1) {
2931 /* sdma1 */
2932 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
2933 tmp |= SDMA_HALT;
2934 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
2935 }
6f2043ce 2936
6f2043ce 2937 evergreen_mc_stop(rdev, &save);
cc066715 2938 if (evergreen_mc_wait_for_idle(rdev)) {
6f2043ce
AD
2939 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2940 }
6f2043ce 2941
cc066715
AD
2942 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
2943 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
2944
2945 if (reset_mask & RADEON_RESET_CP) {
2946 grbm_soft_reset |= SOFT_RESET_CP;
2947
2948 srbm_soft_reset |= SOFT_RESET_GRBM;
2949 }
2950
2951 if (reset_mask & RADEON_RESET_DMA)
2952 srbm_soft_reset |= SOFT_RESET_SDMA;
2953
2954 if (reset_mask & RADEON_RESET_DMA1)
2955 srbm_soft_reset |= SOFT_RESET_SDMA1;
2956
2957 if (reset_mask & RADEON_RESET_DISPLAY)
2958 srbm_soft_reset |= SOFT_RESET_DC;
2959
2960 if (reset_mask & RADEON_RESET_RLC)
2961 grbm_soft_reset |= SOFT_RESET_RLC;
2962
2963 if (reset_mask & RADEON_RESET_SEM)
2964 srbm_soft_reset |= SOFT_RESET_SEM;
2965
2966 if (reset_mask & RADEON_RESET_IH)
2967 srbm_soft_reset |= SOFT_RESET_IH;
2968
2969 if (reset_mask & RADEON_RESET_GRBM)
2970 srbm_soft_reset |= SOFT_RESET_GRBM;
2971
2972 if (reset_mask & RADEON_RESET_VMC)
2973 srbm_soft_reset |= SOFT_RESET_VMC;
2974
2975 if (!(rdev->flags & RADEON_IS_IGP)) {
2976 if (reset_mask & RADEON_RESET_MC)
2977 srbm_soft_reset |= SOFT_RESET_MC;
2978 }
2979
2980 if (grbm_soft_reset) {
2981 tmp = RREG32(GRBM_SOFT_RESET);
2982 tmp |= grbm_soft_reset;
2983 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
2984 WREG32(GRBM_SOFT_RESET, tmp);
2985 tmp = RREG32(GRBM_SOFT_RESET);
2986
2987 udelay(50);
2988
2989 tmp &= ~grbm_soft_reset;
2990 WREG32(GRBM_SOFT_RESET, tmp);
2991 tmp = RREG32(GRBM_SOFT_RESET);
2992 }
2993
2994 if (srbm_soft_reset) {
2995 tmp = RREG32(SRBM_SOFT_RESET);
2996 tmp |= srbm_soft_reset;
2997 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
2998 WREG32(SRBM_SOFT_RESET, tmp);
2999 tmp = RREG32(SRBM_SOFT_RESET);
3000
3001 udelay(50);
3002
3003 tmp &= ~srbm_soft_reset;
3004 WREG32(SRBM_SOFT_RESET, tmp);
3005 tmp = RREG32(SRBM_SOFT_RESET);
3006 }
6f2043ce 3007
6f2043ce
AD
3008 /* Wait a little for things to settle down */
3009 udelay(50);
cc066715 3010
6f2043ce 3011 evergreen_mc_resume(rdev, &save);
cc066715
AD
3012 udelay(50);
3013
3014 cik_print_gpu_status_regs(rdev);
6f2043ce
AD
3015}
3016
3017/**
cc066715 3018 * cik_asic_reset - soft reset GPU
6f2043ce
AD
3019 *
3020 * @rdev: radeon_device pointer
3021 *
cc066715
AD
3022 * Look up which blocks are hung and attempt
3023 * to reset them.
6f2043ce
AD
3024 * Returns 0 for success.
3025 */
3026int cik_asic_reset(struct radeon_device *rdev)
3027{
cc066715 3028 u32 reset_mask;
6f2043ce 3029
cc066715
AD
3030 reset_mask = cik_gpu_check_soft_reset(rdev);
3031
3032 if (reset_mask)
3033 r600_set_bios_scratch_engine_hung(rdev, true);
3034
3035 cik_gpu_soft_reset(rdev, reset_mask);
6f2043ce 3036
cc066715
AD
3037 reset_mask = cik_gpu_check_soft_reset(rdev);
3038
3039 if (!reset_mask)
3040 r600_set_bios_scratch_engine_hung(rdev, false);
3041
3042 return 0;
3043}
3044
3045/**
3046 * cik_gfx_is_lockup - check if the 3D engine is locked up
3047 *
3048 * @rdev: radeon_device pointer
3049 * @ring: radeon_ring structure holding ring information
3050 *
3051 * Check if the 3D engine is locked up (CIK).
3052 * Returns true if the engine is locked, false if not.
3053 */
3054bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3055{
3056 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3057
3058 if (!(reset_mask & (RADEON_RESET_GFX |
3059 RADEON_RESET_COMPUTE |
3060 RADEON_RESET_CP))) {
3061 radeon_ring_lockup_update(ring);
3062 return false;
3063 }
3064 /* force CP activities */
3065 radeon_ring_force_activity(rdev, ring);
3066 return radeon_ring_test_lockup(rdev, ring);
6f2043ce 3067}
1c49165d 3068
21a93e13
AD
3069/**
3070 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3071 *
3072 * @rdev: radeon_device pointer
3073 * @ring: radeon_ring structure holding ring information
3074 *
3075 * Check if the async DMA engine is locked up (CIK).
3076 * Returns true if the engine appears to be locked up, false if not.
3077 */
3078bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3079{
cc066715
AD
3080 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3081 u32 mask;
21a93e13
AD
3082
3083 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
cc066715 3084 mask = RADEON_RESET_DMA;
21a93e13 3085 else
cc066715
AD
3086 mask = RADEON_RESET_DMA1;
3087
3088 if (!(reset_mask & mask)) {
21a93e13
AD
3089 radeon_ring_lockup_update(ring);
3090 return false;
3091 }
3092 /* force ring activities */
3093 radeon_ring_force_activity(rdev, ring);
3094 return radeon_ring_test_lockup(rdev, ring);
3095}
3096
1c49165d
AD
3097/* MC */
3098/**
3099 * cik_mc_program - program the GPU memory controller
3100 *
3101 * @rdev: radeon_device pointer
3102 *
3103 * Set the location of vram, gart, and AGP in the GPU's
3104 * physical address space (CIK).
3105 */
3106static void cik_mc_program(struct radeon_device *rdev)
3107{
3108 struct evergreen_mc_save save;
3109 u32 tmp;
3110 int i, j;
3111
3112 /* Initialize HDP */
3113 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3114 WREG32((0x2c14 + j), 0x00000000);
3115 WREG32((0x2c18 + j), 0x00000000);
3116 WREG32((0x2c1c + j), 0x00000000);
3117 WREG32((0x2c20 + j), 0x00000000);
3118 WREG32((0x2c24 + j), 0x00000000);
3119 }
3120 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3121
3122 evergreen_mc_stop(rdev, &save);
3123 if (radeon_mc_wait_for_idle(rdev)) {
3124 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3125 }
3126 /* Lockout access through VGA aperture*/
3127 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3128 /* Update configuration */
3129 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3130 rdev->mc.vram_start >> 12);
3131 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3132 rdev->mc.vram_end >> 12);
3133 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3134 rdev->vram_scratch.gpu_addr >> 12);
3135 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3136 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3137 WREG32(MC_VM_FB_LOCATION, tmp);
3138 /* XXX double check these! */
3139 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3140 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3141 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3142 WREG32(MC_VM_AGP_BASE, 0);
3143 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3144 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3145 if (radeon_mc_wait_for_idle(rdev)) {
3146 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3147 }
3148 evergreen_mc_resume(rdev, &save);
3149 /* we need to own VRAM, so turn off the VGA renderer here
3150 * to stop it overwriting our objects */
3151 rv515_vga_render_disable(rdev);
3152}
3153
3154/**
3155 * cik_mc_init - initialize the memory controller driver params
3156 *
3157 * @rdev: radeon_device pointer
3158 *
3159 * Look up the amount of vram, vram width, and decide how to place
3160 * vram and gart within the GPU's physical address space (CIK).
3161 * Returns 0 for success.
3162 */
3163static int cik_mc_init(struct radeon_device *rdev)
3164{
3165 u32 tmp;
3166 int chansize, numchan;
3167
3168 /* Get VRAM informations */
3169 rdev->mc.vram_is_ddr = true;
3170 tmp = RREG32(MC_ARB_RAMCFG);
3171 if (tmp & CHANSIZE_MASK) {
3172 chansize = 64;
3173 } else {
3174 chansize = 32;
3175 }
3176 tmp = RREG32(MC_SHARED_CHMAP);
3177 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3178 case 0:
3179 default:
3180 numchan = 1;
3181 break;
3182 case 1:
3183 numchan = 2;
3184 break;
3185 case 2:
3186 numchan = 4;
3187 break;
3188 case 3:
3189 numchan = 8;
3190 break;
3191 case 4:
3192 numchan = 3;
3193 break;
3194 case 5:
3195 numchan = 6;
3196 break;
3197 case 6:
3198 numchan = 10;
3199 break;
3200 case 7:
3201 numchan = 12;
3202 break;
3203 case 8:
3204 numchan = 16;
3205 break;
3206 }
3207 rdev->mc.vram_width = numchan * chansize;
3208 /* Could aper size report 0 ? */
3209 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3210 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3211 /* size in MB on si */
3212 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3213 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3214 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3215 si_vram_gtt_location(rdev, &rdev->mc);
3216 radeon_update_bandwidth_info(rdev);
3217
3218 return 0;
3219}
3220
3221/*
3222 * GART
3223 * VMID 0 is the physical GPU addresses as used by the kernel.
3224 * VMIDs 1-15 are used for userspace clients and are handled
3225 * by the radeon vm/hsa code.
3226 */
3227/**
3228 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3229 *
3230 * @rdev: radeon_device pointer
3231 *
3232 * Flush the TLB for the VMID 0 page table (CIK).
3233 */
3234void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3235{
3236 /* flush hdp cache */
3237 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3238
3239 /* bits 0-15 are the VM contexts0-15 */
3240 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3241}
3242
3243/**
3244 * cik_pcie_gart_enable - gart enable
3245 *
3246 * @rdev: radeon_device pointer
3247 *
3248 * This sets up the TLBs, programs the page tables for VMID0,
3249 * sets up the hw for VMIDs 1-15 which are allocated on
3250 * demand, and sets up the global locations for the LDS, GDS,
3251 * and GPUVM for FSA64 clients (CIK).
3252 * Returns 0 for success, errors for failure.
3253 */
3254static int cik_pcie_gart_enable(struct radeon_device *rdev)
3255{
3256 int r, i;
3257
3258 if (rdev->gart.robj == NULL) {
3259 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3260 return -EINVAL;
3261 }
3262 r = radeon_gart_table_vram_pin(rdev);
3263 if (r)
3264 return r;
3265 radeon_gart_restore(rdev);
3266 /* Setup TLB control */
3267 WREG32(MC_VM_MX_L1_TLB_CNTL,
3268 (0xA << 7) |
3269 ENABLE_L1_TLB |
3270 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3271 ENABLE_ADVANCED_DRIVER_MODEL |
3272 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3273 /* Setup L2 cache */
3274 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3275 ENABLE_L2_FRAGMENT_PROCESSING |
3276 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3277 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3278 EFFECTIVE_L2_QUEUE_SIZE(7) |
3279 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3280 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3281 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3282 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3283 /* setup context0 */
3284 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3285 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3286 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3287 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3288 (u32)(rdev->dummy_page.addr >> 12));
3289 WREG32(VM_CONTEXT0_CNTL2, 0);
3290 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3291 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3292
3293 WREG32(0x15D4, 0);
3294 WREG32(0x15D8, 0);
3295 WREG32(0x15DC, 0);
3296
3297 /* empty context1-15 */
3298 /* FIXME start with 4G, once using 2 level pt switch to full
3299 * vm size space
3300 */
3301 /* set vm size, must be a multiple of 4 */
3302 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3303 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3304 for (i = 1; i < 16; i++) {
3305 if (i < 8)
3306 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3307 rdev->gart.table_addr >> 12);
3308 else
3309 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3310 rdev->gart.table_addr >> 12);
3311 }
3312
3313 /* enable context1-15 */
3314 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3315 (u32)(rdev->dummy_page.addr >> 12));
a00024b0 3316 WREG32(VM_CONTEXT1_CNTL2, 4);
1c49165d 3317 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
a00024b0
AD
3318 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3319 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3320 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3321 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3322 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3323 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3324 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3325 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3326 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3327 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3328 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3329 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1c49165d
AD
3330
3331 /* TC cache setup ??? */
3332 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3333 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3334 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3335
3336 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3337 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3338 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3339 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3340 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3341
3342 WREG32(TC_CFG_L1_VOLATILE, 0);
3343 WREG32(TC_CFG_L2_VOLATILE, 0);
3344
3345 if (rdev->family == CHIP_KAVERI) {
3346 u32 tmp = RREG32(CHUB_CONTROL);
3347 tmp &= ~BYPASS_VM;
3348 WREG32(CHUB_CONTROL, tmp);
3349 }
3350
3351 /* XXX SH_MEM regs */
3352 /* where to put LDS, scratch, GPUVM in FSA64 space */
3353 for (i = 0; i < 16; i++) {
3354 WREG32(SRBM_GFX_CNTL, VMID(i));
21a93e13 3355 /* CP and shaders */
1c49165d
AD
3356 WREG32(SH_MEM_CONFIG, 0);
3357 WREG32(SH_MEM_APE1_BASE, 1);
3358 WREG32(SH_MEM_APE1_LIMIT, 0);
3359 WREG32(SH_MEM_BASES, 0);
21a93e13
AD
3360 /* SDMA GFX */
3361 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3362 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3363 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3364 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3365 /* XXX SDMA RLC - todo */
1c49165d
AD
3366 }
3367 WREG32(SRBM_GFX_CNTL, 0);
3368
3369 cik_pcie_gart_tlb_flush(rdev);
3370 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3371 (unsigned)(rdev->mc.gtt_size >> 20),
3372 (unsigned long long)rdev->gart.table_addr);
3373 rdev->gart.ready = true;
3374 return 0;
3375}
3376
3377/**
3378 * cik_pcie_gart_disable - gart disable
3379 *
3380 * @rdev: radeon_device pointer
3381 *
3382 * This disables all VM page table (CIK).
3383 */
3384static void cik_pcie_gart_disable(struct radeon_device *rdev)
3385{
3386 /* Disable all tables */
3387 WREG32(VM_CONTEXT0_CNTL, 0);
3388 WREG32(VM_CONTEXT1_CNTL, 0);
3389 /* Setup TLB control */
3390 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3391 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3392 /* Setup L2 cache */
3393 WREG32(VM_L2_CNTL,
3394 ENABLE_L2_FRAGMENT_PROCESSING |
3395 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3396 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3397 EFFECTIVE_L2_QUEUE_SIZE(7) |
3398 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3399 WREG32(VM_L2_CNTL2, 0);
3400 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3401 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3402 radeon_gart_table_vram_unpin(rdev);
3403}
3404
3405/**
3406 * cik_pcie_gart_fini - vm fini callback
3407 *
3408 * @rdev: radeon_device pointer
3409 *
3410 * Tears down the driver GART/VM setup (CIK).
3411 */
3412static void cik_pcie_gart_fini(struct radeon_device *rdev)
3413{
3414 cik_pcie_gart_disable(rdev);
3415 radeon_gart_table_vram_free(rdev);
3416 radeon_gart_fini(rdev);
3417}
3418
3419/* vm parser */
3420/**
3421 * cik_ib_parse - vm ib_parse callback
3422 *
3423 * @rdev: radeon_device pointer
3424 * @ib: indirect buffer pointer
3425 *
3426 * CIK uses hw IB checking so this is a nop (CIK).
3427 */
3428int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3429{
3430 return 0;
3431}
3432
3433/*
3434 * vm
3435 * VMID 0 is the physical GPU addresses as used by the kernel.
3436 * VMIDs 1-15 are used for userspace clients and are handled
3437 * by the radeon vm/hsa code.
3438 */
3439/**
3440 * cik_vm_init - cik vm init callback
3441 *
3442 * @rdev: radeon_device pointer
3443 *
3444 * Inits cik specific vm parameters (number of VMs, base of vram for
3445 * VMIDs 1-15) (CIK).
3446 * Returns 0 for success.
3447 */
3448int cik_vm_init(struct radeon_device *rdev)
3449{
3450 /* number of VMs */
3451 rdev->vm_manager.nvm = 16;
3452 /* base offset of vram pages */
3453 if (rdev->flags & RADEON_IS_IGP) {
3454 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3455 tmp <<= 22;
3456 rdev->vm_manager.vram_base_offset = tmp;
3457 } else
3458 rdev->vm_manager.vram_base_offset = 0;
3459
3460 return 0;
3461}
3462
3463/**
3464 * cik_vm_fini - cik vm fini callback
3465 *
3466 * @rdev: radeon_device pointer
3467 *
3468 * Tear down any asic specific VM setup (CIK).
3469 */
3470void cik_vm_fini(struct radeon_device *rdev)
3471{
3472}
3473
f96ab484
AD
3474/**
3475 * cik_vm_flush - cik vm flush using the CP
3476 *
3477 * @rdev: radeon_device pointer
3478 *
3479 * Update the page table base and flush the VM TLB
3480 * using the CP (CIK).
3481 */
3482void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3483{
3484 struct radeon_ring *ring = &rdev->ring[ridx];
3485
3486 if (vm == NULL)
3487 return;
3488
3489 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3490 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3491 WRITE_DATA_DST_SEL(0)));
3492 if (vm->id < 8) {
3493 radeon_ring_write(ring,
3494 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3495 } else {
3496 radeon_ring_write(ring,
3497 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3498 }
3499 radeon_ring_write(ring, 0);
3500 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3501
3502 /* update SH_MEM_* regs */
3503 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3504 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3505 WRITE_DATA_DST_SEL(0)));
3506 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3507 radeon_ring_write(ring, 0);
3508 radeon_ring_write(ring, VMID(vm->id));
3509
3510 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3511 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3512 WRITE_DATA_DST_SEL(0)));
3513 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3514 radeon_ring_write(ring, 0);
3515
3516 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
3517 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
3518 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3519 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3520
3521 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3522 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3523 WRITE_DATA_DST_SEL(0)));
3524 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3525 radeon_ring_write(ring, 0);
3526 radeon_ring_write(ring, VMID(0));
3527
3528 /* HDP flush */
3529 /* We should be using the WAIT_REG_MEM packet here like in
3530 * cik_fence_ring_emit(), but it causes the CP to hang in this
3531 * context...
3532 */
3533 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3534 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3535 WRITE_DATA_DST_SEL(0)));
3536 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3537 radeon_ring_write(ring, 0);
3538 radeon_ring_write(ring, 0);
3539
3540 /* bits 0-15 are the VM contexts0-15 */
3541 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3542 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3543 WRITE_DATA_DST_SEL(0)));
3544 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3545 radeon_ring_write(ring, 0);
3546 radeon_ring_write(ring, 1 << vm->id);
3547
3548 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3549 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3550 radeon_ring_write(ring, 0x0);
3551}
3552
d0e092d9
AD
3553/**
3554 * cik_vm_set_page - update the page tables using sDMA
3555 *
3556 * @rdev: radeon_device pointer
3557 * @ib: indirect buffer to fill with commands
3558 * @pe: addr of the page entry
3559 * @addr: dst addr to write into pe
3560 * @count: number of page entries to update
3561 * @incr: increase next addr by incr bytes
3562 * @flags: access flags
3563 *
3564 * Update the page tables using CP or sDMA (CIK).
3565 */
3566void cik_vm_set_page(struct radeon_device *rdev,
3567 struct radeon_ib *ib,
3568 uint64_t pe,
3569 uint64_t addr, unsigned count,
3570 uint32_t incr, uint32_t flags)
3571{
3572 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3573 uint64_t value;
3574 unsigned ndw;
3575
3576 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3577 /* CP */
3578 while (count) {
3579 ndw = 2 + count * 2;
3580 if (ndw > 0x3FFE)
3581 ndw = 0x3FFE;
3582
3583 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3584 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3585 WRITE_DATA_DST_SEL(1));
3586 ib->ptr[ib->length_dw++] = pe;
3587 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3588 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3589 if (flags & RADEON_VM_PAGE_SYSTEM) {
3590 value = radeon_vm_map_gart(rdev, addr);
3591 value &= 0xFFFFFFFFFFFFF000ULL;
3592 } else if (flags & RADEON_VM_PAGE_VALID) {
3593 value = addr;
3594 } else {
3595 value = 0;
3596 }
3597 addr += incr;
3598 value |= r600_flags;
3599 ib->ptr[ib->length_dw++] = value;
3600 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3601 }
3602 }
3603 } else {
3604 /* DMA */
3605 if (flags & RADEON_VM_PAGE_SYSTEM) {
3606 while (count) {
3607 ndw = count * 2;
3608 if (ndw > 0xFFFFE)
3609 ndw = 0xFFFFE;
3610
3611 /* for non-physically contiguous pages (system) */
3612 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3613 ib->ptr[ib->length_dw++] = pe;
3614 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3615 ib->ptr[ib->length_dw++] = ndw;
3616 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3617 if (flags & RADEON_VM_PAGE_SYSTEM) {
3618 value = radeon_vm_map_gart(rdev, addr);
3619 value &= 0xFFFFFFFFFFFFF000ULL;
3620 } else if (flags & RADEON_VM_PAGE_VALID) {
3621 value = addr;
3622 } else {
3623 value = 0;
3624 }
3625 addr += incr;
3626 value |= r600_flags;
3627 ib->ptr[ib->length_dw++] = value;
3628 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3629 }
3630 }
3631 } else {
3632 while (count) {
3633 ndw = count;
3634 if (ndw > 0x7FFFF)
3635 ndw = 0x7FFFF;
3636
3637 if (flags & RADEON_VM_PAGE_VALID)
3638 value = addr;
3639 else
3640 value = 0;
3641 /* for physically contiguous pages (vram) */
3642 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
3643 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3644 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3645 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3646 ib->ptr[ib->length_dw++] = 0;
3647 ib->ptr[ib->length_dw++] = value; /* value */
3648 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3649 ib->ptr[ib->length_dw++] = incr; /* increment size */
3650 ib->ptr[ib->length_dw++] = 0;
3651 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
3652 pe += ndw * 8;
3653 addr += ndw * incr;
3654 count -= ndw;
3655 }
3656 }
3657 while (ib->length_dw & 0x7)
3658 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
3659 }
3660}
3661
605de6b9
AD
3662/**
3663 * cik_dma_vm_flush - cik vm flush using sDMA
3664 *
3665 * @rdev: radeon_device pointer
3666 *
3667 * Update the page table base and flush the VM TLB
3668 * using sDMA (CIK).
3669 */
3670void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3671{
3672 struct radeon_ring *ring = &rdev->ring[ridx];
3673 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3674 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3675 u32 ref_and_mask;
3676
3677 if (vm == NULL)
3678 return;
3679
3680 if (ridx == R600_RING_TYPE_DMA_INDEX)
3681 ref_and_mask = SDMA0;
3682 else
3683 ref_and_mask = SDMA1;
3684
3685 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3686 if (vm->id < 8) {
3687 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3688 } else {
3689 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3690 }
3691 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3692
3693 /* update SH_MEM_* regs */
3694 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3695 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3696 radeon_ring_write(ring, VMID(vm->id));
3697
3698 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3699 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3700 radeon_ring_write(ring, 0);
3701
3702 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3703 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
3704 radeon_ring_write(ring, 0);
3705
3706 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3707 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
3708 radeon_ring_write(ring, 1);
3709
3710 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3711 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
3712 radeon_ring_write(ring, 0);
3713
3714 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3715 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3716 radeon_ring_write(ring, VMID(0));
3717
3718 /* flush HDP */
3719 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3720 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3721 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3722 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3723 radeon_ring_write(ring, ref_and_mask); /* MASK */
3724 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3725
3726 /* flush TLB */
3727 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3728 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3729 radeon_ring_write(ring, 1 << vm->id);
3730}
3731
f6796cae
AD
3732/*
3733 * RLC
3734 * The RLC is a multi-purpose microengine that handles a
3735 * variety of functions, the most important of which is
3736 * the interrupt controller.
3737 */
3738/**
3739 * cik_rlc_stop - stop the RLC ME
3740 *
3741 * @rdev: radeon_device pointer
3742 *
3743 * Halt the RLC ME (MicroEngine) (CIK).
3744 */
3745static void cik_rlc_stop(struct radeon_device *rdev)
3746{
3747 int i, j, k;
3748 u32 mask, tmp;
3749
3750 tmp = RREG32(CP_INT_CNTL_RING0);
3751 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3752 WREG32(CP_INT_CNTL_RING0, tmp);
3753
3754 RREG32(CB_CGTT_SCLK_CTRL);
3755 RREG32(CB_CGTT_SCLK_CTRL);
3756 RREG32(CB_CGTT_SCLK_CTRL);
3757 RREG32(CB_CGTT_SCLK_CTRL);
3758
3759 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3760 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
3761
3762 WREG32(RLC_CNTL, 0);
3763
3764 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3765 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3766 cik_select_se_sh(rdev, i, j);
3767 for (k = 0; k < rdev->usec_timeout; k++) {
3768 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
3769 break;
3770 udelay(1);
3771 }
3772 }
3773 }
3774 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3775
3776 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
3777 for (k = 0; k < rdev->usec_timeout; k++) {
3778 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3779 break;
3780 udelay(1);
3781 }
3782}
3783
3784/**
3785 * cik_rlc_start - start the RLC ME
3786 *
3787 * @rdev: radeon_device pointer
3788 *
3789 * Unhalt the RLC ME (MicroEngine) (CIK).
3790 */
3791static void cik_rlc_start(struct radeon_device *rdev)
3792{
3793 u32 tmp;
3794
3795 WREG32(RLC_CNTL, RLC_ENABLE);
3796
3797 tmp = RREG32(CP_INT_CNTL_RING0);
3798 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3799 WREG32(CP_INT_CNTL_RING0, tmp);
3800
3801 udelay(50);
3802}
3803
3804/**
3805 * cik_rlc_resume - setup the RLC hw
3806 *
3807 * @rdev: radeon_device pointer
3808 *
3809 * Initialize the RLC registers, load the ucode,
3810 * and start the RLC (CIK).
3811 * Returns 0 for success, -EINVAL if the ucode is not available.
3812 */
3813static int cik_rlc_resume(struct radeon_device *rdev)
3814{
3815 u32 i, size;
3816 u32 clear_state_info[3];
3817 const __be32 *fw_data;
3818
3819 if (!rdev->rlc_fw)
3820 return -EINVAL;
3821
3822 switch (rdev->family) {
3823 case CHIP_BONAIRE:
3824 default:
3825 size = BONAIRE_RLC_UCODE_SIZE;
3826 break;
3827 case CHIP_KAVERI:
3828 size = KV_RLC_UCODE_SIZE;
3829 break;
3830 case CHIP_KABINI:
3831 size = KB_RLC_UCODE_SIZE;
3832 break;
3833 }
3834
3835 cik_rlc_stop(rdev);
3836
3837 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
3838 RREG32(GRBM_SOFT_RESET);
3839 udelay(50);
3840 WREG32(GRBM_SOFT_RESET, 0);
3841 RREG32(GRBM_SOFT_RESET);
3842 udelay(50);
3843
3844 WREG32(RLC_LB_CNTR_INIT, 0);
3845 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
3846
3847 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3848 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
3849 WREG32(RLC_LB_PARAMS, 0x00600408);
3850 WREG32(RLC_LB_CNTL, 0x80000004);
3851
3852 WREG32(RLC_MC_CNTL, 0);
3853 WREG32(RLC_UCODE_CNTL, 0);
3854
3855 fw_data = (const __be32 *)rdev->rlc_fw->data;
3856 WREG32(RLC_GPM_UCODE_ADDR, 0);
3857 for (i = 0; i < size; i++)
3858 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
3859 WREG32(RLC_GPM_UCODE_ADDR, 0);
3860
3861 /* XXX */
3862 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
3863 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
3864 clear_state_info[2] = 0;//cik_default_size;
3865 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
3866 for (i = 0; i < 3; i++)
3867 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
3868 WREG32(RLC_DRIVER_DMA_STATUS, 0);
3869
3870 cik_rlc_start(rdev);
3871
3872 return 0;
3873}
a59781bb
AD
3874
3875/*
3876 * Interrupts
3877 * Starting with r6xx, interrupts are handled via a ring buffer.
3878 * Ring buffers are areas of GPU accessible memory that the GPU
3879 * writes interrupt vectors into and the host reads vectors out of.
3880 * There is a rptr (read pointer) that determines where the
3881 * host is currently reading, and a wptr (write pointer)
3882 * which determines where the GPU has written. When the
3883 * pointers are equal, the ring is idle. When the GPU
3884 * writes vectors to the ring buffer, it increments the
3885 * wptr. When there is an interrupt, the host then starts
3886 * fetching commands and processing them until the pointers are
3887 * equal again at which point it updates the rptr.
3888 */
3889
3890/**
3891 * cik_enable_interrupts - Enable the interrupt ring buffer
3892 *
3893 * @rdev: radeon_device pointer
3894 *
3895 * Enable the interrupt ring buffer (CIK).
3896 */
3897static void cik_enable_interrupts(struct radeon_device *rdev)
3898{
3899 u32 ih_cntl = RREG32(IH_CNTL);
3900 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3901
3902 ih_cntl |= ENABLE_INTR;
3903 ih_rb_cntl |= IH_RB_ENABLE;
3904 WREG32(IH_CNTL, ih_cntl);
3905 WREG32(IH_RB_CNTL, ih_rb_cntl);
3906 rdev->ih.enabled = true;
3907}
3908
3909/**
3910 * cik_disable_interrupts - Disable the interrupt ring buffer
3911 *
3912 * @rdev: radeon_device pointer
3913 *
3914 * Disable the interrupt ring buffer (CIK).
3915 */
3916static void cik_disable_interrupts(struct radeon_device *rdev)
3917{
3918 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3919 u32 ih_cntl = RREG32(IH_CNTL);
3920
3921 ih_rb_cntl &= ~IH_RB_ENABLE;
3922 ih_cntl &= ~ENABLE_INTR;
3923 WREG32(IH_RB_CNTL, ih_rb_cntl);
3924 WREG32(IH_CNTL, ih_cntl);
3925 /* set rptr, wptr to 0 */
3926 WREG32(IH_RB_RPTR, 0);
3927 WREG32(IH_RB_WPTR, 0);
3928 rdev->ih.enabled = false;
3929 rdev->ih.rptr = 0;
3930}
3931
3932/**
3933 * cik_disable_interrupt_state - Disable all interrupt sources
3934 *
3935 * @rdev: radeon_device pointer
3936 *
3937 * Clear all interrupt enable bits used by the driver (CIK).
3938 */
3939static void cik_disable_interrupt_state(struct radeon_device *rdev)
3940{
3941 u32 tmp;
3942
3943 /* gfx ring */
3944 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
21a93e13
AD
3945 /* sdma */
3946 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3947 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3948 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3949 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
a59781bb
AD
3950 /* compute queues */
3951 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
3952 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
3953 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
3954 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3955 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3956 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3957 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3958 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3959 /* grbm */
3960 WREG32(GRBM_INT_CNTL, 0);
3961 /* vline/vblank, etc. */
3962 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3963 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3964 if (rdev->num_crtc >= 4) {
3965 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3966 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3967 }
3968 if (rdev->num_crtc >= 6) {
3969 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3970 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3971 }
3972
3973 /* dac hotplug */
3974 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3975
3976 /* digital hotplug */
3977 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3978 WREG32(DC_HPD1_INT_CONTROL, tmp);
3979 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3980 WREG32(DC_HPD2_INT_CONTROL, tmp);
3981 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3982 WREG32(DC_HPD3_INT_CONTROL, tmp);
3983 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3984 WREG32(DC_HPD4_INT_CONTROL, tmp);
3985 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3986 WREG32(DC_HPD5_INT_CONTROL, tmp);
3987 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3988 WREG32(DC_HPD6_INT_CONTROL, tmp);
3989
3990}
3991
3992/**
3993 * cik_irq_init - init and enable the interrupt ring
3994 *
3995 * @rdev: radeon_device pointer
3996 *
3997 * Allocate a ring buffer for the interrupt controller,
3998 * enable the RLC, disable interrupts, enable the IH
3999 * ring buffer and enable it (CIK).
4000 * Called at device load and reume.
4001 * Returns 0 for success, errors for failure.
4002 */
4003static int cik_irq_init(struct radeon_device *rdev)
4004{
4005 int ret = 0;
4006 int rb_bufsz;
4007 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4008
4009 /* allocate ring */
4010 ret = r600_ih_ring_alloc(rdev);
4011 if (ret)
4012 return ret;
4013
4014 /* disable irqs */
4015 cik_disable_interrupts(rdev);
4016
4017 /* init rlc */
4018 ret = cik_rlc_resume(rdev);
4019 if (ret) {
4020 r600_ih_ring_fini(rdev);
4021 return ret;
4022 }
4023
4024 /* setup interrupt control */
4025 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4026 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4027 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4028 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4029 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4030 */
4031 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4032 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4033 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4034 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4035
4036 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4037 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4038
4039 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4040 IH_WPTR_OVERFLOW_CLEAR |
4041 (rb_bufsz << 1));
4042
4043 if (rdev->wb.enabled)
4044 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4045
4046 /* set the writeback address whether it's enabled or not */
4047 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4048 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4049
4050 WREG32(IH_RB_CNTL, ih_rb_cntl);
4051
4052 /* set rptr, wptr to 0 */
4053 WREG32(IH_RB_RPTR, 0);
4054 WREG32(IH_RB_WPTR, 0);
4055
4056 /* Default settings for IH_CNTL (disabled at first) */
4057 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4058 /* RPTR_REARM only works if msi's are enabled */
4059 if (rdev->msi_enabled)
4060 ih_cntl |= RPTR_REARM;
4061 WREG32(IH_CNTL, ih_cntl);
4062
4063 /* force the active interrupt state to all disabled */
4064 cik_disable_interrupt_state(rdev);
4065
4066 pci_set_master(rdev->pdev);
4067
4068 /* enable irqs */
4069 cik_enable_interrupts(rdev);
4070
4071 return ret;
4072}
4073
4074/**
4075 * cik_irq_set - enable/disable interrupt sources
4076 *
4077 * @rdev: radeon_device pointer
4078 *
4079 * Enable interrupt sources on the GPU (vblanks, hpd,
4080 * etc.) (CIK).
4081 * Returns 0 for success, errors for failure.
4082 */
4083int cik_irq_set(struct radeon_device *rdev)
4084{
4085 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4086 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4087 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4088 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4089 u32 grbm_int_cntl = 0;
21a93e13 4090 u32 dma_cntl, dma_cntl1;
a59781bb
AD
4091
4092 if (!rdev->irq.installed) {
4093 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4094 return -EINVAL;
4095 }
4096 /* don't enable anything if the ih is disabled */
4097 if (!rdev->ih.enabled) {
4098 cik_disable_interrupts(rdev);
4099 /* force the active interrupt state to all disabled */
4100 cik_disable_interrupt_state(rdev);
4101 return 0;
4102 }
4103
4104 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4105 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4106 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4107 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4108 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4109 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4110
21a93e13
AD
4111 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4112 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4113
a59781bb
AD
4114 /* enable CP interrupts on all rings */
4115 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4116 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4117 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4118 }
4119 /* TODO: compute queues! */
4120 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4121
21a93e13
AD
4122 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4123 DRM_DEBUG("cik_irq_set: sw int dma\n");
4124 dma_cntl |= TRAP_ENABLE;
4125 }
4126
4127 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4128 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4129 dma_cntl1 |= TRAP_ENABLE;
4130 }
4131
a59781bb
AD
4132 if (rdev->irq.crtc_vblank_int[0] ||
4133 atomic_read(&rdev->irq.pflip[0])) {
4134 DRM_DEBUG("cik_irq_set: vblank 0\n");
4135 crtc1 |= VBLANK_INTERRUPT_MASK;
4136 }
4137 if (rdev->irq.crtc_vblank_int[1] ||
4138 atomic_read(&rdev->irq.pflip[1])) {
4139 DRM_DEBUG("cik_irq_set: vblank 1\n");
4140 crtc2 |= VBLANK_INTERRUPT_MASK;
4141 }
4142 if (rdev->irq.crtc_vblank_int[2] ||
4143 atomic_read(&rdev->irq.pflip[2])) {
4144 DRM_DEBUG("cik_irq_set: vblank 2\n");
4145 crtc3 |= VBLANK_INTERRUPT_MASK;
4146 }
4147 if (rdev->irq.crtc_vblank_int[3] ||
4148 atomic_read(&rdev->irq.pflip[3])) {
4149 DRM_DEBUG("cik_irq_set: vblank 3\n");
4150 crtc4 |= VBLANK_INTERRUPT_MASK;
4151 }
4152 if (rdev->irq.crtc_vblank_int[4] ||
4153 atomic_read(&rdev->irq.pflip[4])) {
4154 DRM_DEBUG("cik_irq_set: vblank 4\n");
4155 crtc5 |= VBLANK_INTERRUPT_MASK;
4156 }
4157 if (rdev->irq.crtc_vblank_int[5] ||
4158 atomic_read(&rdev->irq.pflip[5])) {
4159 DRM_DEBUG("cik_irq_set: vblank 5\n");
4160 crtc6 |= VBLANK_INTERRUPT_MASK;
4161 }
4162 if (rdev->irq.hpd[0]) {
4163 DRM_DEBUG("cik_irq_set: hpd 1\n");
4164 hpd1 |= DC_HPDx_INT_EN;
4165 }
4166 if (rdev->irq.hpd[1]) {
4167 DRM_DEBUG("cik_irq_set: hpd 2\n");
4168 hpd2 |= DC_HPDx_INT_EN;
4169 }
4170 if (rdev->irq.hpd[2]) {
4171 DRM_DEBUG("cik_irq_set: hpd 3\n");
4172 hpd3 |= DC_HPDx_INT_EN;
4173 }
4174 if (rdev->irq.hpd[3]) {
4175 DRM_DEBUG("cik_irq_set: hpd 4\n");
4176 hpd4 |= DC_HPDx_INT_EN;
4177 }
4178 if (rdev->irq.hpd[4]) {
4179 DRM_DEBUG("cik_irq_set: hpd 5\n");
4180 hpd5 |= DC_HPDx_INT_EN;
4181 }
4182 if (rdev->irq.hpd[5]) {
4183 DRM_DEBUG("cik_irq_set: hpd 6\n");
4184 hpd6 |= DC_HPDx_INT_EN;
4185 }
4186
4187 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4188
21a93e13
AD
4189 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4190 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4191
a59781bb
AD
4192 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4193
4194 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4195 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4196 if (rdev->num_crtc >= 4) {
4197 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4198 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4199 }
4200 if (rdev->num_crtc >= 6) {
4201 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4202 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4203 }
4204
4205 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4206 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4207 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4208 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4209 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4210 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4211
4212 return 0;
4213}
4214
4215/**
4216 * cik_irq_ack - ack interrupt sources
4217 *
4218 * @rdev: radeon_device pointer
4219 *
4220 * Ack interrupt sources on the GPU (vblanks, hpd,
4221 * etc.) (CIK). Certain interrupts sources are sw
4222 * generated and do not require an explicit ack.
4223 */
4224static inline void cik_irq_ack(struct radeon_device *rdev)
4225{
4226 u32 tmp;
4227
4228 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4229 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4230 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4231 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4232 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4233 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4234 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4235
4236 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4237 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4238 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4239 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4240 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4241 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4242 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4243 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4244
4245 if (rdev->num_crtc >= 4) {
4246 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4247 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4248 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4249 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4250 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4251 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4252 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4253 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4254 }
4255
4256 if (rdev->num_crtc >= 6) {
4257 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4258 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4259 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4260 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4261 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4262 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4263 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4264 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4265 }
4266
4267 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4268 tmp = RREG32(DC_HPD1_INT_CONTROL);
4269 tmp |= DC_HPDx_INT_ACK;
4270 WREG32(DC_HPD1_INT_CONTROL, tmp);
4271 }
4272 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4273 tmp = RREG32(DC_HPD2_INT_CONTROL);
4274 tmp |= DC_HPDx_INT_ACK;
4275 WREG32(DC_HPD2_INT_CONTROL, tmp);
4276 }
4277 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4278 tmp = RREG32(DC_HPD3_INT_CONTROL);
4279 tmp |= DC_HPDx_INT_ACK;
4280 WREG32(DC_HPD3_INT_CONTROL, tmp);
4281 }
4282 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4283 tmp = RREG32(DC_HPD4_INT_CONTROL);
4284 tmp |= DC_HPDx_INT_ACK;
4285 WREG32(DC_HPD4_INT_CONTROL, tmp);
4286 }
4287 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4288 tmp = RREG32(DC_HPD5_INT_CONTROL);
4289 tmp |= DC_HPDx_INT_ACK;
4290 WREG32(DC_HPD5_INT_CONTROL, tmp);
4291 }
4292 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4293 tmp = RREG32(DC_HPD5_INT_CONTROL);
4294 tmp |= DC_HPDx_INT_ACK;
4295 WREG32(DC_HPD6_INT_CONTROL, tmp);
4296 }
4297}
4298
4299/**
4300 * cik_irq_disable - disable interrupts
4301 *
4302 * @rdev: radeon_device pointer
4303 *
4304 * Disable interrupts on the hw (CIK).
4305 */
4306static void cik_irq_disable(struct radeon_device *rdev)
4307{
4308 cik_disable_interrupts(rdev);
4309 /* Wait and acknowledge irq */
4310 mdelay(1);
4311 cik_irq_ack(rdev);
4312 cik_disable_interrupt_state(rdev);
4313}
4314
4315/**
4316 * cik_irq_disable - disable interrupts for suspend
4317 *
4318 * @rdev: radeon_device pointer
4319 *
4320 * Disable interrupts and stop the RLC (CIK).
4321 * Used for suspend.
4322 */
4323static void cik_irq_suspend(struct radeon_device *rdev)
4324{
4325 cik_irq_disable(rdev);
4326 cik_rlc_stop(rdev);
4327}
4328
4329/**
4330 * cik_irq_fini - tear down interrupt support
4331 *
4332 * @rdev: radeon_device pointer
4333 *
4334 * Disable interrupts on the hw and free the IH ring
4335 * buffer (CIK).
4336 * Used for driver unload.
4337 */
4338static void cik_irq_fini(struct radeon_device *rdev)
4339{
4340 cik_irq_suspend(rdev);
4341 r600_ih_ring_fini(rdev);
4342}
4343
4344/**
4345 * cik_get_ih_wptr - get the IH ring buffer wptr
4346 *
4347 * @rdev: radeon_device pointer
4348 *
4349 * Get the IH ring buffer wptr from either the register
4350 * or the writeback memory buffer (CIK). Also check for
4351 * ring buffer overflow and deal with it.
4352 * Used by cik_irq_process().
4353 * Returns the value of the wptr.
4354 */
4355static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4356{
4357 u32 wptr, tmp;
4358
4359 if (rdev->wb.enabled)
4360 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4361 else
4362 wptr = RREG32(IH_RB_WPTR);
4363
4364 if (wptr & RB_OVERFLOW) {
4365 /* When a ring buffer overflow happen start parsing interrupt
4366 * from the last not overwritten vector (wptr + 16). Hopefully
4367 * this should allow us to catchup.
4368 */
4369 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4370 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4371 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4372 tmp = RREG32(IH_RB_CNTL);
4373 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4374 WREG32(IH_RB_CNTL, tmp);
4375 }
4376 return (wptr & rdev->ih.ptr_mask);
4377}
4378
4379/* CIK IV Ring
4380 * Each IV ring entry is 128 bits:
4381 * [7:0] - interrupt source id
4382 * [31:8] - reserved
4383 * [59:32] - interrupt source data
4384 * [63:60] - reserved
21a93e13
AD
4385 * [71:64] - RINGID
4386 * CP:
4387 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
a59781bb
AD
4388 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4389 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4390 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4391 * PIPE_ID - ME0 0=3D
4392 * - ME1&2 compute dispatcher (4 pipes each)
21a93e13
AD
4393 * SDMA:
4394 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4395 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4396 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
a59781bb
AD
4397 * [79:72] - VMID
4398 * [95:80] - PASID
4399 * [127:96] - reserved
4400 */
4401/**
4402 * cik_irq_process - interrupt handler
4403 *
4404 * @rdev: radeon_device pointer
4405 *
4406 * Interrupt hander (CIK). Walk the IH ring,
4407 * ack interrupts and schedule work to handle
4408 * interrupt events.
4409 * Returns irq process return code.
4410 */
4411int cik_irq_process(struct radeon_device *rdev)
4412{
4413 u32 wptr;
4414 u32 rptr;
4415 u32 src_id, src_data, ring_id;
4416 u8 me_id, pipe_id, queue_id;
4417 u32 ring_index;
4418 bool queue_hotplug = false;
4419 bool queue_reset = false;
4420
4421 if (!rdev->ih.enabled || rdev->shutdown)
4422 return IRQ_NONE;
4423
4424 wptr = cik_get_ih_wptr(rdev);
4425
4426restart_ih:
4427 /* is somebody else already processing irqs? */
4428 if (atomic_xchg(&rdev->ih.lock, 1))
4429 return IRQ_NONE;
4430
4431 rptr = rdev->ih.rptr;
4432 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4433
4434 /* Order reading of wptr vs. reading of IH ring data */
4435 rmb();
4436
4437 /* display interrupts */
4438 cik_irq_ack(rdev);
4439
4440 while (rptr != wptr) {
4441 /* wptr/rptr are in bytes! */
4442 ring_index = rptr / 4;
4443 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4444 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4445 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
a59781bb
AD
4446
4447 switch (src_id) {
4448 case 1: /* D1 vblank/vline */
4449 switch (src_data) {
4450 case 0: /* D1 vblank */
4451 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4452 if (rdev->irq.crtc_vblank_int[0]) {
4453 drm_handle_vblank(rdev->ddev, 0);
4454 rdev->pm.vblank_sync = true;
4455 wake_up(&rdev->irq.vblank_queue);
4456 }
4457 if (atomic_read(&rdev->irq.pflip[0]))
4458 radeon_crtc_handle_flip(rdev, 0);
4459 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4460 DRM_DEBUG("IH: D1 vblank\n");
4461 }
4462 break;
4463 case 1: /* D1 vline */
4464 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4465 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4466 DRM_DEBUG("IH: D1 vline\n");
4467 }
4468 break;
4469 default:
4470 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4471 break;
4472 }
4473 break;
4474 case 2: /* D2 vblank/vline */
4475 switch (src_data) {
4476 case 0: /* D2 vblank */
4477 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4478 if (rdev->irq.crtc_vblank_int[1]) {
4479 drm_handle_vblank(rdev->ddev, 1);
4480 rdev->pm.vblank_sync = true;
4481 wake_up(&rdev->irq.vblank_queue);
4482 }
4483 if (atomic_read(&rdev->irq.pflip[1]))
4484 radeon_crtc_handle_flip(rdev, 1);
4485 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4486 DRM_DEBUG("IH: D2 vblank\n");
4487 }
4488 break;
4489 case 1: /* D2 vline */
4490 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4491 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4492 DRM_DEBUG("IH: D2 vline\n");
4493 }
4494 break;
4495 default:
4496 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4497 break;
4498 }
4499 break;
4500 case 3: /* D3 vblank/vline */
4501 switch (src_data) {
4502 case 0: /* D3 vblank */
4503 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4504 if (rdev->irq.crtc_vblank_int[2]) {
4505 drm_handle_vblank(rdev->ddev, 2);
4506 rdev->pm.vblank_sync = true;
4507 wake_up(&rdev->irq.vblank_queue);
4508 }
4509 if (atomic_read(&rdev->irq.pflip[2]))
4510 radeon_crtc_handle_flip(rdev, 2);
4511 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4512 DRM_DEBUG("IH: D3 vblank\n");
4513 }
4514 break;
4515 case 1: /* D3 vline */
4516 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4517 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4518 DRM_DEBUG("IH: D3 vline\n");
4519 }
4520 break;
4521 default:
4522 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4523 break;
4524 }
4525 break;
4526 case 4: /* D4 vblank/vline */
4527 switch (src_data) {
4528 case 0: /* D4 vblank */
4529 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4530 if (rdev->irq.crtc_vblank_int[3]) {
4531 drm_handle_vblank(rdev->ddev, 3);
4532 rdev->pm.vblank_sync = true;
4533 wake_up(&rdev->irq.vblank_queue);
4534 }
4535 if (atomic_read(&rdev->irq.pflip[3]))
4536 radeon_crtc_handle_flip(rdev, 3);
4537 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4538 DRM_DEBUG("IH: D4 vblank\n");
4539 }
4540 break;
4541 case 1: /* D4 vline */
4542 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4543 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4544 DRM_DEBUG("IH: D4 vline\n");
4545 }
4546 break;
4547 default:
4548 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4549 break;
4550 }
4551 break;
4552 case 5: /* D5 vblank/vline */
4553 switch (src_data) {
4554 case 0: /* D5 vblank */
4555 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4556 if (rdev->irq.crtc_vblank_int[4]) {
4557 drm_handle_vblank(rdev->ddev, 4);
4558 rdev->pm.vblank_sync = true;
4559 wake_up(&rdev->irq.vblank_queue);
4560 }
4561 if (atomic_read(&rdev->irq.pflip[4]))
4562 radeon_crtc_handle_flip(rdev, 4);
4563 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4564 DRM_DEBUG("IH: D5 vblank\n");
4565 }
4566 break;
4567 case 1: /* D5 vline */
4568 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4569 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4570 DRM_DEBUG("IH: D5 vline\n");
4571 }
4572 break;
4573 default:
4574 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4575 break;
4576 }
4577 break;
4578 case 6: /* D6 vblank/vline */
4579 switch (src_data) {
4580 case 0: /* D6 vblank */
4581 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4582 if (rdev->irq.crtc_vblank_int[5]) {
4583 drm_handle_vblank(rdev->ddev, 5);
4584 rdev->pm.vblank_sync = true;
4585 wake_up(&rdev->irq.vblank_queue);
4586 }
4587 if (atomic_read(&rdev->irq.pflip[5]))
4588 radeon_crtc_handle_flip(rdev, 5);
4589 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4590 DRM_DEBUG("IH: D6 vblank\n");
4591 }
4592 break;
4593 case 1: /* D6 vline */
4594 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4595 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4596 DRM_DEBUG("IH: D6 vline\n");
4597 }
4598 break;
4599 default:
4600 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4601 break;
4602 }
4603 break;
4604 case 42: /* HPD hotplug */
4605 switch (src_data) {
4606 case 0:
4607 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4608 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
4609 queue_hotplug = true;
4610 DRM_DEBUG("IH: HPD1\n");
4611 }
4612 break;
4613 case 1:
4614 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4615 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4616 queue_hotplug = true;
4617 DRM_DEBUG("IH: HPD2\n");
4618 }
4619 break;
4620 case 2:
4621 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4622 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4623 queue_hotplug = true;
4624 DRM_DEBUG("IH: HPD3\n");
4625 }
4626 break;
4627 case 3:
4628 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4629 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4630 queue_hotplug = true;
4631 DRM_DEBUG("IH: HPD4\n");
4632 }
4633 break;
4634 case 4:
4635 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4636 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4637 queue_hotplug = true;
4638 DRM_DEBUG("IH: HPD5\n");
4639 }
4640 break;
4641 case 5:
4642 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4643 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4644 queue_hotplug = true;
4645 DRM_DEBUG("IH: HPD6\n");
4646 }
4647 break;
4648 default:
4649 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4650 break;
4651 }
4652 break;
9d97c99b
AD
4653 case 146:
4654 case 147:
4655 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4656 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4657 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4658 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4659 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4660 /* reset addr and status */
4661 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4662 break;
a59781bb
AD
4663 case 176: /* GFX RB CP_INT */
4664 case 177: /* GFX IB CP_INT */
4665 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4666 break;
4667 case 181: /* CP EOP event */
4668 DRM_DEBUG("IH: CP EOP\n");
21a93e13
AD
4669 /* XXX check the bitfield order! */
4670 me_id = (ring_id & 0x60) >> 5;
4671 pipe_id = (ring_id & 0x18) >> 3;
4672 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
4673 switch (me_id) {
4674 case 0:
4675 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4676 break;
4677 case 1:
4678 /* XXX compute */
4679 break;
4680 case 2:
4681 /* XXX compute */
4682 break;
4683 }
4684 break;
4685 case 184: /* CP Privileged reg access */
4686 DRM_ERROR("Illegal register access in command stream\n");
4687 /* XXX check the bitfield order! */
4688 me_id = (ring_id & 0x60) >> 5;
4689 pipe_id = (ring_id & 0x18) >> 3;
4690 queue_id = (ring_id & 0x7) >> 0;
4691 switch (me_id) {
4692 case 0:
4693 /* This results in a full GPU reset, but all we need to do is soft
4694 * reset the CP for gfx
4695 */
4696 queue_reset = true;
4697 break;
4698 case 1:
4699 /* XXX compute */
4700 break;
4701 case 2:
4702 /* XXX compute */
4703 break;
4704 }
4705 break;
4706 case 185: /* CP Privileged inst */
4707 DRM_ERROR("Illegal instruction in command stream\n");
21a93e13
AD
4708 /* XXX check the bitfield order! */
4709 me_id = (ring_id & 0x60) >> 5;
4710 pipe_id = (ring_id & 0x18) >> 3;
4711 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
4712 switch (me_id) {
4713 case 0:
4714 /* This results in a full GPU reset, but all we need to do is soft
4715 * reset the CP for gfx
4716 */
4717 queue_reset = true;
4718 break;
4719 case 1:
4720 /* XXX compute */
4721 break;
4722 case 2:
4723 /* XXX compute */
4724 break;
4725 }
4726 break;
21a93e13
AD
4727 case 224: /* SDMA trap event */
4728 /* XXX check the bitfield order! */
4729 me_id = (ring_id & 0x3) >> 0;
4730 queue_id = (ring_id & 0xc) >> 2;
4731 DRM_DEBUG("IH: SDMA trap\n");
4732 switch (me_id) {
4733 case 0:
4734 switch (queue_id) {
4735 case 0:
4736 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4737 break;
4738 case 1:
4739 /* XXX compute */
4740 break;
4741 case 2:
4742 /* XXX compute */
4743 break;
4744 }
4745 break;
4746 case 1:
4747 switch (queue_id) {
4748 case 0:
4749 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4750 break;
4751 case 1:
4752 /* XXX compute */
4753 break;
4754 case 2:
4755 /* XXX compute */
4756 break;
4757 }
4758 break;
4759 }
4760 break;
4761 case 241: /* SDMA Privileged inst */
4762 case 247: /* SDMA Privileged inst */
4763 DRM_ERROR("Illegal instruction in SDMA command stream\n");
4764 /* XXX check the bitfield order! */
4765 me_id = (ring_id & 0x3) >> 0;
4766 queue_id = (ring_id & 0xc) >> 2;
4767 switch (me_id) {
4768 case 0:
4769 switch (queue_id) {
4770 case 0:
4771 queue_reset = true;
4772 break;
4773 case 1:
4774 /* XXX compute */
4775 queue_reset = true;
4776 break;
4777 case 2:
4778 /* XXX compute */
4779 queue_reset = true;
4780 break;
4781 }
4782 break;
4783 case 1:
4784 switch (queue_id) {
4785 case 0:
4786 queue_reset = true;
4787 break;
4788 case 1:
4789 /* XXX compute */
4790 queue_reset = true;
4791 break;
4792 case 2:
4793 /* XXX compute */
4794 queue_reset = true;
4795 break;
4796 }
4797 break;
4798 }
4799 break;
a59781bb
AD
4800 case 233: /* GUI IDLE */
4801 DRM_DEBUG("IH: GUI idle\n");
4802 break;
4803 default:
4804 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4805 break;
4806 }
4807
4808 /* wptr/rptr are in bytes! */
4809 rptr += 16;
4810 rptr &= rdev->ih.ptr_mask;
4811 }
4812 if (queue_hotplug)
4813 schedule_work(&rdev->hotplug_work);
4814 if (queue_reset)
4815 schedule_work(&rdev->reset_work);
4816 rdev->ih.rptr = rptr;
4817 WREG32(IH_RB_RPTR, rdev->ih.rptr);
4818 atomic_set(&rdev->ih.lock, 0);
4819
4820 /* make sure wptr hasn't changed while processing */
4821 wptr = cik_get_ih_wptr(rdev);
4822 if (wptr != rptr)
4823 goto restart_ih;
4824
4825 return IRQ_HANDLED;
4826}
7bf94a2c
AD
4827
4828/*
4829 * startup/shutdown callbacks
4830 */
4831/**
4832 * cik_startup - program the asic to a functional state
4833 *
4834 * @rdev: radeon_device pointer
4835 *
4836 * Programs the asic to a functional state (CIK).
4837 * Called by cik_init() and cik_resume().
4838 * Returns 0 for success, error for failure.
4839 */
4840static int cik_startup(struct radeon_device *rdev)
4841{
4842 struct radeon_ring *ring;
4843 int r;
4844
4845 if (rdev->flags & RADEON_IS_IGP) {
4846 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4847 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
4848 r = cik_init_microcode(rdev);
4849 if (r) {
4850 DRM_ERROR("Failed to load firmware!\n");
4851 return r;
4852 }
4853 }
4854 } else {
4855 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4856 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
4857 !rdev->mc_fw) {
4858 r = cik_init_microcode(rdev);
4859 if (r) {
4860 DRM_ERROR("Failed to load firmware!\n");
4861 return r;
4862 }
4863 }
4864
4865 r = ci_mc_load_microcode(rdev);
4866 if (r) {
4867 DRM_ERROR("Failed to load MC firmware!\n");
4868 return r;
4869 }
4870 }
4871
4872 r = r600_vram_scratch_init(rdev);
4873 if (r)
4874 return r;
4875
4876 cik_mc_program(rdev);
4877 r = cik_pcie_gart_enable(rdev);
4878 if (r)
4879 return r;
4880 cik_gpu_init(rdev);
4881
4882 /* allocate rlc buffers */
4883 r = si_rlc_init(rdev);
4884 if (r) {
4885 DRM_ERROR("Failed to init rlc BOs!\n");
4886 return r;
4887 }
4888
4889 /* allocate wb buffer */
4890 r = radeon_wb_init(rdev);
4891 if (r)
4892 return r;
4893
4894 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4895 if (r) {
4896 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4897 return r;
4898 }
4899
4900 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4901 if (r) {
4902 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4903 return r;
4904 }
4905
4906 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4907 if (r) {
4908 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4909 return r;
4910 }
4911
87167bb1
CK
4912 r = cik_uvd_resume(rdev);
4913 if (!r) {
4914 r = radeon_fence_driver_start_ring(rdev,
4915 R600_RING_TYPE_UVD_INDEX);
4916 if (r)
4917 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
4918 }
4919 if (r)
4920 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
4921
7bf94a2c
AD
4922 /* Enable IRQ */
4923 if (!rdev->irq.installed) {
4924 r = radeon_irq_kms_init(rdev);
4925 if (r)
4926 return r;
4927 }
4928
4929 r = cik_irq_init(rdev);
4930 if (r) {
4931 DRM_ERROR("radeon: IH init failed (%d).\n", r);
4932 radeon_irq_kms_fini(rdev);
4933 return r;
4934 }
4935 cik_irq_set(rdev);
4936
4937 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4938 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4939 CP_RB0_RPTR, CP_RB0_WPTR,
4940 0, 0xfffff, RADEON_CP_PACKET2);
4941 if (r)
4942 return r;
4943
4944 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4945 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4946 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
4947 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
4948 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4949 if (r)
4950 return r;
4951
4952 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4953 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4954 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
4955 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
4956 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4957 if (r)
4958 return r;
4959
4960 r = cik_cp_resume(rdev);
4961 if (r)
4962 return r;
4963
4964 r = cik_sdma_resume(rdev);
4965 if (r)
4966 return r;
4967
87167bb1
CK
4968 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
4969 if (ring->ring_size) {
4970 r = radeon_ring_init(rdev, ring, ring->ring_size,
4971 R600_WB_UVD_RPTR_OFFSET,
4972 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
4973 0, 0xfffff, RADEON_CP_PACKET2);
4974 if (!r)
4975 r = r600_uvd_init(rdev);
4976 if (r)
4977 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
4978 }
4979
7bf94a2c
AD
4980 r = radeon_ib_pool_init(rdev);
4981 if (r) {
4982 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4983 return r;
4984 }
4985
4986 r = radeon_vm_manager_init(rdev);
4987 if (r) {
4988 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4989 return r;
4990 }
4991
4992 return 0;
4993}
4994
4995/**
4996 * cik_resume - resume the asic to a functional state
4997 *
4998 * @rdev: radeon_device pointer
4999 *
5000 * Programs the asic to a functional state (CIK).
5001 * Called at resume.
5002 * Returns 0 for success, error for failure.
5003 */
5004int cik_resume(struct radeon_device *rdev)
5005{
5006 int r;
5007
5008 /* post card */
5009 atom_asic_init(rdev->mode_info.atom_context);
5010
5011 rdev->accel_working = true;
5012 r = cik_startup(rdev);
5013 if (r) {
5014 DRM_ERROR("cik startup failed on resume\n");
5015 rdev->accel_working = false;
5016 return r;
5017 }
5018
5019 return r;
5020
5021}
5022
5023/**
5024 * cik_suspend - suspend the asic
5025 *
5026 * @rdev: radeon_device pointer
5027 *
5028 * Bring the chip into a state suitable for suspend (CIK).
5029 * Called at suspend.
5030 * Returns 0 for success.
5031 */
5032int cik_suspend(struct radeon_device *rdev)
5033{
5034 radeon_vm_manager_fini(rdev);
5035 cik_cp_enable(rdev, false);
5036 cik_sdma_enable(rdev, false);
87167bb1
CK
5037 r600_uvd_rbc_stop(rdev);
5038 radeon_uvd_suspend(rdev);
7bf94a2c
AD
5039 cik_irq_suspend(rdev);
5040 radeon_wb_disable(rdev);
5041 cik_pcie_gart_disable(rdev);
5042 return 0;
5043}
5044
5045/* Plan is to move initialization in that function and use
5046 * helper function so that radeon_device_init pretty much
5047 * do nothing more than calling asic specific function. This
5048 * should also allow to remove a bunch of callback function
5049 * like vram_info.
5050 */
5051/**
5052 * cik_init - asic specific driver and hw init
5053 *
5054 * @rdev: radeon_device pointer
5055 *
5056 * Setup asic specific driver variables and program the hw
5057 * to a functional state (CIK).
5058 * Called at driver startup.
5059 * Returns 0 for success, errors for failure.
5060 */
5061int cik_init(struct radeon_device *rdev)
5062{
5063 struct radeon_ring *ring;
5064 int r;
5065
5066 /* Read BIOS */
5067 if (!radeon_get_bios(rdev)) {
5068 if (ASIC_IS_AVIVO(rdev))
5069 return -EINVAL;
5070 }
5071 /* Must be an ATOMBIOS */
5072 if (!rdev->is_atom_bios) {
5073 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5074 return -EINVAL;
5075 }
5076 r = radeon_atombios_init(rdev);
5077 if (r)
5078 return r;
5079
5080 /* Post card if necessary */
5081 if (!radeon_card_posted(rdev)) {
5082 if (!rdev->bios) {
5083 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5084 return -EINVAL;
5085 }
5086 DRM_INFO("GPU not posted. posting now...\n");
5087 atom_asic_init(rdev->mode_info.atom_context);
5088 }
5089 /* Initialize scratch registers */
5090 cik_scratch_init(rdev);
5091 /* Initialize surface registers */
5092 radeon_surface_init(rdev);
5093 /* Initialize clocks */
5094 radeon_get_clock_info(rdev->ddev);
5095
5096 /* Fence driver */
5097 r = radeon_fence_driver_init(rdev);
5098 if (r)
5099 return r;
5100
5101 /* initialize memory controller */
5102 r = cik_mc_init(rdev);
5103 if (r)
5104 return r;
5105 /* Memory manager */
5106 r = radeon_bo_init(rdev);
5107 if (r)
5108 return r;
5109
5110 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5111 ring->ring_obj = NULL;
5112 r600_ring_init(rdev, ring, 1024 * 1024);
5113
5114 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5115 ring->ring_obj = NULL;
5116 r600_ring_init(rdev, ring, 256 * 1024);
5117
5118 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5119 ring->ring_obj = NULL;
5120 r600_ring_init(rdev, ring, 256 * 1024);
5121
87167bb1
CK
5122 r = radeon_uvd_init(rdev);
5123 if (!r) {
5124 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5125 ring->ring_obj = NULL;
5126 r600_ring_init(rdev, ring, 4096);
5127 }
5128
7bf94a2c
AD
5129 rdev->ih.ring_obj = NULL;
5130 r600_ih_ring_init(rdev, 64 * 1024);
5131
5132 r = r600_pcie_gart_init(rdev);
5133 if (r)
5134 return r;
5135
5136 rdev->accel_working = true;
5137 r = cik_startup(rdev);
5138 if (r) {
5139 dev_err(rdev->dev, "disabling GPU acceleration\n");
5140 cik_cp_fini(rdev);
5141 cik_sdma_fini(rdev);
5142 cik_irq_fini(rdev);
5143 si_rlc_fini(rdev);
5144 radeon_wb_fini(rdev);
5145 radeon_ib_pool_fini(rdev);
5146 radeon_vm_manager_fini(rdev);
5147 radeon_irq_kms_fini(rdev);
5148 cik_pcie_gart_fini(rdev);
5149 rdev->accel_working = false;
5150 }
5151
5152 /* Don't start up if the MC ucode is missing.
5153 * The default clocks and voltages before the MC ucode
5154 * is loaded are not suffient for advanced operations.
5155 */
5156 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5157 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5158 return -EINVAL;
5159 }
5160
5161 return 0;
5162}
5163
5164/**
5165 * cik_fini - asic specific driver and hw fini
5166 *
5167 * @rdev: radeon_device pointer
5168 *
5169 * Tear down the asic specific driver variables and program the hw
5170 * to an idle state (CIK).
5171 * Called at driver unload.
5172 */
5173void cik_fini(struct radeon_device *rdev)
5174{
5175 cik_cp_fini(rdev);
5176 cik_sdma_fini(rdev);
5177 cik_irq_fini(rdev);
5178 si_rlc_fini(rdev);
5179 radeon_wb_fini(rdev);
5180 radeon_vm_manager_fini(rdev);
5181 radeon_ib_pool_fini(rdev);
5182 radeon_irq_kms_fini(rdev);
87167bb1 5183 radeon_uvd_fini(rdev);
7bf94a2c
AD
5184 cik_pcie_gart_fini(rdev);
5185 r600_vram_scratch_fini(rdev);
5186 radeon_gem_fini(rdev);
5187 radeon_fence_driver_fini(rdev);
5188 radeon_bo_fini(rdev);
5189 radeon_atombios_fini(rdev);
5190 kfree(rdev->bios);
5191 rdev->bios = NULL;
5192}
cd84a27d
AD
5193
5194/* display watermark setup */
5195/**
5196 * dce8_line_buffer_adjust - Set up the line buffer
5197 *
5198 * @rdev: radeon_device pointer
5199 * @radeon_crtc: the selected display controller
5200 * @mode: the current display mode on the selected display
5201 * controller
5202 *
5203 * Setup up the line buffer allocation for
5204 * the selected display controller (CIK).
5205 * Returns the line buffer size in pixels.
5206 */
5207static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5208 struct radeon_crtc *radeon_crtc,
5209 struct drm_display_mode *mode)
5210{
5211 u32 tmp;
5212
5213 /*
5214 * Line Buffer Setup
5215 * There are 6 line buffers, one for each display controllers.
5216 * There are 3 partitions per LB. Select the number of partitions
5217 * to enable based on the display width. For display widths larger
5218 * than 4096, you need use to use 2 display controllers and combine
5219 * them using the stereo blender.
5220 */
5221 if (radeon_crtc->base.enabled && mode) {
5222 if (mode->crtc_hdisplay < 1920)
5223 tmp = 1;
5224 else if (mode->crtc_hdisplay < 2560)
5225 tmp = 2;
5226 else if (mode->crtc_hdisplay < 4096)
5227 tmp = 0;
5228 else {
5229 DRM_DEBUG_KMS("Mode too big for LB!\n");
5230 tmp = 0;
5231 }
5232 } else
5233 tmp = 1;
5234
5235 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5236 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5237
5238 if (radeon_crtc->base.enabled && mode) {
5239 switch (tmp) {
5240 case 0:
5241 default:
5242 return 4096 * 2;
5243 case 1:
5244 return 1920 * 2;
5245 case 2:
5246 return 2560 * 2;
5247 }
5248 }
5249
5250 /* controller not enabled, so no lb used */
5251 return 0;
5252}
5253
5254/**
5255 * cik_get_number_of_dram_channels - get the number of dram channels
5256 *
5257 * @rdev: radeon_device pointer
5258 *
5259 * Look up the number of video ram channels (CIK).
5260 * Used for display watermark bandwidth calculations
5261 * Returns the number of dram channels
5262 */
5263static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5264{
5265 u32 tmp = RREG32(MC_SHARED_CHMAP);
5266
5267 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5268 case 0:
5269 default:
5270 return 1;
5271 case 1:
5272 return 2;
5273 case 2:
5274 return 4;
5275 case 3:
5276 return 8;
5277 case 4:
5278 return 3;
5279 case 5:
5280 return 6;
5281 case 6:
5282 return 10;
5283 case 7:
5284 return 12;
5285 case 8:
5286 return 16;
5287 }
5288}
5289
5290struct dce8_wm_params {
5291 u32 dram_channels; /* number of dram channels */
5292 u32 yclk; /* bandwidth per dram data pin in kHz */
5293 u32 sclk; /* engine clock in kHz */
5294 u32 disp_clk; /* display clock in kHz */
5295 u32 src_width; /* viewport width */
5296 u32 active_time; /* active display time in ns */
5297 u32 blank_time; /* blank time in ns */
5298 bool interlaced; /* mode is interlaced */
5299 fixed20_12 vsc; /* vertical scale ratio */
5300 u32 num_heads; /* number of active crtcs */
5301 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5302 u32 lb_size; /* line buffer allocated to pipe */
5303 u32 vtaps; /* vertical scaler taps */
5304};
5305
5306/**
5307 * dce8_dram_bandwidth - get the dram bandwidth
5308 *
5309 * @wm: watermark calculation data
5310 *
5311 * Calculate the raw dram bandwidth (CIK).
5312 * Used for display watermark bandwidth calculations
5313 * Returns the dram bandwidth in MBytes/s
5314 */
5315static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5316{
5317 /* Calculate raw DRAM Bandwidth */
5318 fixed20_12 dram_efficiency; /* 0.7 */
5319 fixed20_12 yclk, dram_channels, bandwidth;
5320 fixed20_12 a;
5321
5322 a.full = dfixed_const(1000);
5323 yclk.full = dfixed_const(wm->yclk);
5324 yclk.full = dfixed_div(yclk, a);
5325 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5326 a.full = dfixed_const(10);
5327 dram_efficiency.full = dfixed_const(7);
5328 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5329 bandwidth.full = dfixed_mul(dram_channels, yclk);
5330 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5331
5332 return dfixed_trunc(bandwidth);
5333}
5334
5335/**
5336 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5337 *
5338 * @wm: watermark calculation data
5339 *
5340 * Calculate the dram bandwidth used for display (CIK).
5341 * Used for display watermark bandwidth calculations
5342 * Returns the dram bandwidth for display in MBytes/s
5343 */
5344static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5345{
5346 /* Calculate DRAM Bandwidth and the part allocated to display. */
5347 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5348 fixed20_12 yclk, dram_channels, bandwidth;
5349 fixed20_12 a;
5350
5351 a.full = dfixed_const(1000);
5352 yclk.full = dfixed_const(wm->yclk);
5353 yclk.full = dfixed_div(yclk, a);
5354 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5355 a.full = dfixed_const(10);
5356 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5357 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5358 bandwidth.full = dfixed_mul(dram_channels, yclk);
5359 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5360
5361 return dfixed_trunc(bandwidth);
5362}
5363
5364/**
5365 * dce8_data_return_bandwidth - get the data return bandwidth
5366 *
5367 * @wm: watermark calculation data
5368 *
5369 * Calculate the data return bandwidth used for display (CIK).
5370 * Used for display watermark bandwidth calculations
5371 * Returns the data return bandwidth in MBytes/s
5372 */
5373static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5374{
5375 /* Calculate the display Data return Bandwidth */
5376 fixed20_12 return_efficiency; /* 0.8 */
5377 fixed20_12 sclk, bandwidth;
5378 fixed20_12 a;
5379
5380 a.full = dfixed_const(1000);
5381 sclk.full = dfixed_const(wm->sclk);
5382 sclk.full = dfixed_div(sclk, a);
5383 a.full = dfixed_const(10);
5384 return_efficiency.full = dfixed_const(8);
5385 return_efficiency.full = dfixed_div(return_efficiency, a);
5386 a.full = dfixed_const(32);
5387 bandwidth.full = dfixed_mul(a, sclk);
5388 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5389
5390 return dfixed_trunc(bandwidth);
5391}
5392
5393/**
5394 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5395 *
5396 * @wm: watermark calculation data
5397 *
5398 * Calculate the dmif bandwidth used for display (CIK).
5399 * Used for display watermark bandwidth calculations
5400 * Returns the dmif bandwidth in MBytes/s
5401 */
5402static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5403{
5404 /* Calculate the DMIF Request Bandwidth */
5405 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5406 fixed20_12 disp_clk, bandwidth;
5407 fixed20_12 a, b;
5408
5409 a.full = dfixed_const(1000);
5410 disp_clk.full = dfixed_const(wm->disp_clk);
5411 disp_clk.full = dfixed_div(disp_clk, a);
5412 a.full = dfixed_const(32);
5413 b.full = dfixed_mul(a, disp_clk);
5414
5415 a.full = dfixed_const(10);
5416 disp_clk_request_efficiency.full = dfixed_const(8);
5417 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5418
5419 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5420
5421 return dfixed_trunc(bandwidth);
5422}
5423
5424/**
5425 * dce8_available_bandwidth - get the min available bandwidth
5426 *
5427 * @wm: watermark calculation data
5428 *
5429 * Calculate the min available bandwidth used for display (CIK).
5430 * Used for display watermark bandwidth calculations
5431 * Returns the min available bandwidth in MBytes/s
5432 */
5433static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5434{
5435 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
5436 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
5437 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
5438 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
5439
5440 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
5441}
5442
5443/**
5444 * dce8_average_bandwidth - get the average available bandwidth
5445 *
5446 * @wm: watermark calculation data
5447 *
5448 * Calculate the average available bandwidth used for display (CIK).
5449 * Used for display watermark bandwidth calculations
5450 * Returns the average available bandwidth in MBytes/s
5451 */
5452static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
5453{
5454 /* Calculate the display mode Average Bandwidth
5455 * DisplayMode should contain the source and destination dimensions,
5456 * timing, etc.
5457 */
5458 fixed20_12 bpp;
5459 fixed20_12 line_time;
5460 fixed20_12 src_width;
5461 fixed20_12 bandwidth;
5462 fixed20_12 a;
5463
5464 a.full = dfixed_const(1000);
5465 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
5466 line_time.full = dfixed_div(line_time, a);
5467 bpp.full = dfixed_const(wm->bytes_per_pixel);
5468 src_width.full = dfixed_const(wm->src_width);
5469 bandwidth.full = dfixed_mul(src_width, bpp);
5470 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
5471 bandwidth.full = dfixed_div(bandwidth, line_time);
5472
5473 return dfixed_trunc(bandwidth);
5474}
5475
5476/**
5477 * dce8_latency_watermark - get the latency watermark
5478 *
5479 * @wm: watermark calculation data
5480 *
5481 * Calculate the latency watermark (CIK).
5482 * Used for display watermark bandwidth calculations
5483 * Returns the latency watermark in ns
5484 */
5485static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
5486{
5487 /* First calculate the latency in ns */
5488 u32 mc_latency = 2000; /* 2000 ns. */
5489 u32 available_bandwidth = dce8_available_bandwidth(wm);
5490 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
5491 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
5492 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
5493 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
5494 (wm->num_heads * cursor_line_pair_return_time);
5495 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
5496 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
5497 u32 tmp, dmif_size = 12288;
5498 fixed20_12 a, b, c;
5499
5500 if (wm->num_heads == 0)
5501 return 0;
5502
5503 a.full = dfixed_const(2);
5504 b.full = dfixed_const(1);
5505 if ((wm->vsc.full > a.full) ||
5506 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
5507 (wm->vtaps >= 5) ||
5508 ((wm->vsc.full >= a.full) && wm->interlaced))
5509 max_src_lines_per_dst_line = 4;
5510 else
5511 max_src_lines_per_dst_line = 2;
5512
5513 a.full = dfixed_const(available_bandwidth);
5514 b.full = dfixed_const(wm->num_heads);
5515 a.full = dfixed_div(a, b);
5516
5517 b.full = dfixed_const(mc_latency + 512);
5518 c.full = dfixed_const(wm->disp_clk);
5519 b.full = dfixed_div(b, c);
5520
5521 c.full = dfixed_const(dmif_size);
5522 b.full = dfixed_div(c, b);
5523
5524 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
5525
5526 b.full = dfixed_const(1000);
5527 c.full = dfixed_const(wm->disp_clk);
5528 b.full = dfixed_div(c, b);
5529 c.full = dfixed_const(wm->bytes_per_pixel);
5530 b.full = dfixed_mul(b, c);
5531
5532 lb_fill_bw = min(tmp, dfixed_trunc(b));
5533
5534 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
5535 b.full = dfixed_const(1000);
5536 c.full = dfixed_const(lb_fill_bw);
5537 b.full = dfixed_div(c, b);
5538 a.full = dfixed_div(a, b);
5539 line_fill_time = dfixed_trunc(a);
5540
5541 if (line_fill_time < wm->active_time)
5542 return latency;
5543 else
5544 return latency + (line_fill_time - wm->active_time);
5545
5546}
5547
5548/**
5549 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
5550 * average and available dram bandwidth
5551 *
5552 * @wm: watermark calculation data
5553 *
5554 * Check if the display average bandwidth fits in the display
5555 * dram bandwidth (CIK).
5556 * Used for display watermark bandwidth calculations
5557 * Returns true if the display fits, false if not.
5558 */
5559static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5560{
5561 if (dce8_average_bandwidth(wm) <=
5562 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
5563 return true;
5564 else
5565 return false;
5566}
5567
5568/**
5569 * dce8_average_bandwidth_vs_available_bandwidth - check
5570 * average and available bandwidth
5571 *
5572 * @wm: watermark calculation data
5573 *
5574 * Check if the display average bandwidth fits in the display
5575 * available bandwidth (CIK).
5576 * Used for display watermark bandwidth calculations
5577 * Returns true if the display fits, false if not.
5578 */
5579static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
5580{
5581 if (dce8_average_bandwidth(wm) <=
5582 (dce8_available_bandwidth(wm) / wm->num_heads))
5583 return true;
5584 else
5585 return false;
5586}
5587
5588/**
5589 * dce8_check_latency_hiding - check latency hiding
5590 *
5591 * @wm: watermark calculation data
5592 *
5593 * Check latency hiding (CIK).
5594 * Used for display watermark bandwidth calculations
5595 * Returns true if the display fits, false if not.
5596 */
5597static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
5598{
5599 u32 lb_partitions = wm->lb_size / wm->src_width;
5600 u32 line_time = wm->active_time + wm->blank_time;
5601 u32 latency_tolerant_lines;
5602 u32 latency_hiding;
5603 fixed20_12 a;
5604
5605 a.full = dfixed_const(1);
5606 if (wm->vsc.full > a.full)
5607 latency_tolerant_lines = 1;
5608 else {
5609 if (lb_partitions <= (wm->vtaps + 1))
5610 latency_tolerant_lines = 1;
5611 else
5612 latency_tolerant_lines = 2;
5613 }
5614
5615 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
5616
5617 if (dce8_latency_watermark(wm) <= latency_hiding)
5618 return true;
5619 else
5620 return false;
5621}
5622
5623/**
5624 * dce8_program_watermarks - program display watermarks
5625 *
5626 * @rdev: radeon_device pointer
5627 * @radeon_crtc: the selected display controller
5628 * @lb_size: line buffer size
5629 * @num_heads: number of display controllers in use
5630 *
5631 * Calculate and program the display watermarks for the
5632 * selected display controller (CIK).
5633 */
5634static void dce8_program_watermarks(struct radeon_device *rdev,
5635 struct radeon_crtc *radeon_crtc,
5636 u32 lb_size, u32 num_heads)
5637{
5638 struct drm_display_mode *mode = &radeon_crtc->base.mode;
5639 struct dce8_wm_params wm;
5640 u32 pixel_period;
5641 u32 line_time = 0;
5642 u32 latency_watermark_a = 0, latency_watermark_b = 0;
5643 u32 tmp, wm_mask;
5644
5645 if (radeon_crtc->base.enabled && num_heads && mode) {
5646 pixel_period = 1000000 / (u32)mode->clock;
5647 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
5648
5649 wm.yclk = rdev->pm.current_mclk * 10;
5650 wm.sclk = rdev->pm.current_sclk * 10;
5651 wm.disp_clk = mode->clock;
5652 wm.src_width = mode->crtc_hdisplay;
5653 wm.active_time = mode->crtc_hdisplay * pixel_period;
5654 wm.blank_time = line_time - wm.active_time;
5655 wm.interlaced = false;
5656 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
5657 wm.interlaced = true;
5658 wm.vsc = radeon_crtc->vsc;
5659 wm.vtaps = 1;
5660 if (radeon_crtc->rmx_type != RMX_OFF)
5661 wm.vtaps = 2;
5662 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
5663 wm.lb_size = lb_size;
5664 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
5665 wm.num_heads = num_heads;
5666
5667 /* set for high clocks */
5668 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
5669 /* set for low clocks */
5670 /* wm.yclk = low clk; wm.sclk = low clk */
5671 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
5672
5673 /* possibly force display priority to high */
5674 /* should really do this at mode validation time... */
5675 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
5676 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
5677 !dce8_check_latency_hiding(&wm) ||
5678 (rdev->disp_priority == 2)) {
5679 DRM_DEBUG_KMS("force priority to high\n");
5680 }
5681 }
5682
5683 /* select wm A */
5684 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5685 tmp = wm_mask;
5686 tmp &= ~LATENCY_WATERMARK_MASK(3);
5687 tmp |= LATENCY_WATERMARK_MASK(1);
5688 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5689 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5690 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
5691 LATENCY_HIGH_WATERMARK(line_time)));
5692 /* select wm B */
5693 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5694 tmp &= ~LATENCY_WATERMARK_MASK(3);
5695 tmp |= LATENCY_WATERMARK_MASK(2);
5696 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5697 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5698 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
5699 LATENCY_HIGH_WATERMARK(line_time)));
5700 /* restore original selection */
5701 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
5702}
5703
5704/**
5705 * dce8_bandwidth_update - program display watermarks
5706 *
5707 * @rdev: radeon_device pointer
5708 *
5709 * Calculate and program the display watermarks and line
5710 * buffer allocation (CIK).
5711 */
5712void dce8_bandwidth_update(struct radeon_device *rdev)
5713{
5714 struct drm_display_mode *mode = NULL;
5715 u32 num_heads = 0, lb_size;
5716 int i;
5717
5718 radeon_update_display_priority(rdev);
5719
5720 for (i = 0; i < rdev->num_crtc; i++) {
5721 if (rdev->mode_info.crtcs[i]->base.enabled)
5722 num_heads++;
5723 }
5724 for (i = 0; i < rdev->num_crtc; i++) {
5725 mode = &rdev->mode_info.crtcs[i]->base.mode;
5726 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
5727 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
5728 }
5729}
44fa346f
AD
5730
5731/**
5732 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
5733 *
5734 * @rdev: radeon_device pointer
5735 *
5736 * Fetches a GPU clock counter snapshot (SI).
5737 * Returns the 64 bit clock counter snapshot.
5738 */
5739uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
5740{
5741 uint64_t clock;
5742
5743 mutex_lock(&rdev->gpu_clock_mutex);
5744 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5745 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5746 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5747 mutex_unlock(&rdev->gpu_clock_mutex);
5748 return clock;
5749}
5750
87167bb1
CK
5751static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
5752 u32 cntl_reg, u32 status_reg)
5753{
5754 int r, i;
5755 struct atom_clock_dividers dividers;
5756 uint32_t tmp;
5757
5758 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
5759 clock, false, &dividers);
5760 if (r)
5761 return r;
5762
5763 tmp = RREG32_SMC(cntl_reg);
5764 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
5765 tmp |= dividers.post_divider;
5766 WREG32_SMC(cntl_reg, tmp);
5767
5768 for (i = 0; i < 100; i++) {
5769 if (RREG32_SMC(status_reg) & DCLK_STATUS)
5770 break;
5771 mdelay(10);
5772 }
5773 if (i == 100)
5774 return -ETIMEDOUT;
5775
5776 return 0;
5777}
5778
5779int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5780{
5781 int r = 0;
5782
5783 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
5784 if (r)
5785 return r;
5786
5787 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
5788 return r;
5789}
5790
5791int cik_uvd_resume(struct radeon_device *rdev)
5792{
5793 uint64_t addr;
5794 uint32_t size;
5795 int r;
5796
5797 r = radeon_uvd_resume(rdev);
5798 if (r)
5799 return r;
5800
5801 /* programm the VCPU memory controller bits 0-27 */
5802 addr = rdev->uvd.gpu_addr >> 3;
5803 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
5804 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
5805 WREG32(UVD_VCPU_CACHE_SIZE0, size);
5806
5807 addr += size;
5808 size = RADEON_UVD_STACK_SIZE >> 3;
5809 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
5810 WREG32(UVD_VCPU_CACHE_SIZE1, size);
5811
5812 addr += size;
5813 size = RADEON_UVD_HEAP_SIZE >> 3;
5814 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
5815 WREG32(UVD_VCPU_CACHE_SIZE2, size);
5816
5817 /* bits 28-31 */
5818 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
5819 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
5820
5821 /* bits 32-39 */
5822 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
5823 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
5824
5825 return 0;
5826}