drm/radeon/kms: add support for interrupts on SI
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
CommitLineData
43b3cd99
AD
1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
0f0de06c
AD
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
43b3cd99
AD
28#include "drmP.h"
29#include "radeon.h"
30#include "radeon_asic.h"
31#include "radeon_drm.h"
32#include "sid.h"
33#include "atom.h"
48c0c902 34#include "si_blit_shaders.h"
43b3cd99 35
0f0de06c
AD
36#define SI_PFP_UCODE_SIZE 2144
37#define SI_PM4_UCODE_SIZE 2144
38#define SI_CE_UCODE_SIZE 2144
39#define SI_RLC_UCODE_SIZE 2048
40#define SI_MC_UCODE_SIZE 7769
41
42MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
43MODULE_FIRMWARE("radeon/TAHITI_me.bin");
44MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
45MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
46MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
47MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
48MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
49MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
50MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
51MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
52MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
53MODULE_FIRMWARE("radeon/VERDE_me.bin");
54MODULE_FIRMWARE("radeon/VERDE_ce.bin");
55MODULE_FIRMWARE("radeon/VERDE_mc.bin");
56MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
57
25a857fb
AD
58extern int r600_ih_ring_alloc(struct radeon_device *rdev);
59extern void r600_ih_ring_fini(struct radeon_device *rdev);
0a96d72b 60extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
c476dde2
AD
61extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
62extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
0a96d72b 63
1bd47d2e
AD
64/* get temperature in millidegrees */
65int si_get_temp(struct radeon_device *rdev)
66{
67 u32 temp;
68 int actual_temp = 0;
69
70 temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
71 CTF_TEMP_SHIFT;
72
73 if (temp & 0x200)
74 actual_temp = 255;
75 else
76 actual_temp = temp & 0x1ff;
77
78 actual_temp = (actual_temp * 1000);
79
80 return actual_temp;
81}
82
8b074dd6
AD
83#define TAHITI_IO_MC_REGS_SIZE 36
84
85static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
86 {0x0000006f, 0x03044000},
87 {0x00000070, 0x0480c018},
88 {0x00000071, 0x00000040},
89 {0x00000072, 0x01000000},
90 {0x00000074, 0x000000ff},
91 {0x00000075, 0x00143400},
92 {0x00000076, 0x08ec0800},
93 {0x00000077, 0x040000cc},
94 {0x00000079, 0x00000000},
95 {0x0000007a, 0x21000409},
96 {0x0000007c, 0x00000000},
97 {0x0000007d, 0xe8000000},
98 {0x0000007e, 0x044408a8},
99 {0x0000007f, 0x00000003},
100 {0x00000080, 0x00000000},
101 {0x00000081, 0x01000000},
102 {0x00000082, 0x02000000},
103 {0x00000083, 0x00000000},
104 {0x00000084, 0xe3f3e4f4},
105 {0x00000085, 0x00052024},
106 {0x00000087, 0x00000000},
107 {0x00000088, 0x66036603},
108 {0x00000089, 0x01000000},
109 {0x0000008b, 0x1c0a0000},
110 {0x0000008c, 0xff010000},
111 {0x0000008e, 0xffffefff},
112 {0x0000008f, 0xfff3efff},
113 {0x00000090, 0xfff3efbf},
114 {0x00000094, 0x00101101},
115 {0x00000095, 0x00000fff},
116 {0x00000096, 0x00116fff},
117 {0x00000097, 0x60010000},
118 {0x00000098, 0x10010000},
119 {0x00000099, 0x00006000},
120 {0x0000009a, 0x00001000},
121 {0x0000009f, 0x00a77400}
122};
123
124static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
125 {0x0000006f, 0x03044000},
126 {0x00000070, 0x0480c018},
127 {0x00000071, 0x00000040},
128 {0x00000072, 0x01000000},
129 {0x00000074, 0x000000ff},
130 {0x00000075, 0x00143400},
131 {0x00000076, 0x08ec0800},
132 {0x00000077, 0x040000cc},
133 {0x00000079, 0x00000000},
134 {0x0000007a, 0x21000409},
135 {0x0000007c, 0x00000000},
136 {0x0000007d, 0xe8000000},
137 {0x0000007e, 0x044408a8},
138 {0x0000007f, 0x00000003},
139 {0x00000080, 0x00000000},
140 {0x00000081, 0x01000000},
141 {0x00000082, 0x02000000},
142 {0x00000083, 0x00000000},
143 {0x00000084, 0xe3f3e4f4},
144 {0x00000085, 0x00052024},
145 {0x00000087, 0x00000000},
146 {0x00000088, 0x66036603},
147 {0x00000089, 0x01000000},
148 {0x0000008b, 0x1c0a0000},
149 {0x0000008c, 0xff010000},
150 {0x0000008e, 0xffffefff},
151 {0x0000008f, 0xfff3efff},
152 {0x00000090, 0xfff3efbf},
153 {0x00000094, 0x00101101},
154 {0x00000095, 0x00000fff},
155 {0x00000096, 0x00116fff},
156 {0x00000097, 0x60010000},
157 {0x00000098, 0x10010000},
158 {0x00000099, 0x00006000},
159 {0x0000009a, 0x00001000},
160 {0x0000009f, 0x00a47400}
161};
162
163static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
164 {0x0000006f, 0x03044000},
165 {0x00000070, 0x0480c018},
166 {0x00000071, 0x00000040},
167 {0x00000072, 0x01000000},
168 {0x00000074, 0x000000ff},
169 {0x00000075, 0x00143400},
170 {0x00000076, 0x08ec0800},
171 {0x00000077, 0x040000cc},
172 {0x00000079, 0x00000000},
173 {0x0000007a, 0x21000409},
174 {0x0000007c, 0x00000000},
175 {0x0000007d, 0xe8000000},
176 {0x0000007e, 0x044408a8},
177 {0x0000007f, 0x00000003},
178 {0x00000080, 0x00000000},
179 {0x00000081, 0x01000000},
180 {0x00000082, 0x02000000},
181 {0x00000083, 0x00000000},
182 {0x00000084, 0xe3f3e4f4},
183 {0x00000085, 0x00052024},
184 {0x00000087, 0x00000000},
185 {0x00000088, 0x66036603},
186 {0x00000089, 0x01000000},
187 {0x0000008b, 0x1c0a0000},
188 {0x0000008c, 0xff010000},
189 {0x0000008e, 0xffffefff},
190 {0x0000008f, 0xfff3efff},
191 {0x00000090, 0xfff3efbf},
192 {0x00000094, 0x00101101},
193 {0x00000095, 0x00000fff},
194 {0x00000096, 0x00116fff},
195 {0x00000097, 0x60010000},
196 {0x00000098, 0x10010000},
197 {0x00000099, 0x00006000},
198 {0x0000009a, 0x00001000},
199 {0x0000009f, 0x00a37400}
200};
201
202/* ucode loading */
203static int si_mc_load_microcode(struct radeon_device *rdev)
204{
205 const __be32 *fw_data;
206 u32 running, blackout = 0;
207 u32 *io_mc_regs;
208 int i, ucode_size, regs_size;
209
210 if (!rdev->mc_fw)
211 return -EINVAL;
212
213 switch (rdev->family) {
214 case CHIP_TAHITI:
215 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
216 ucode_size = SI_MC_UCODE_SIZE;
217 regs_size = TAHITI_IO_MC_REGS_SIZE;
218 break;
219 case CHIP_PITCAIRN:
220 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
221 ucode_size = SI_MC_UCODE_SIZE;
222 regs_size = TAHITI_IO_MC_REGS_SIZE;
223 break;
224 case CHIP_VERDE:
225 default:
226 io_mc_regs = (u32 *)&verde_io_mc_regs;
227 ucode_size = SI_MC_UCODE_SIZE;
228 regs_size = TAHITI_IO_MC_REGS_SIZE;
229 break;
230 }
231
232 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
233
234 if (running == 0) {
235 if (running) {
236 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
237 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
238 }
239
240 /* reset the engine and set to writable */
241 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
242 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
243
244 /* load mc io regs */
245 for (i = 0; i < regs_size; i++) {
246 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
247 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
248 }
249 /* load the MC ucode */
250 fw_data = (const __be32 *)rdev->mc_fw->data;
251 for (i = 0; i < ucode_size; i++)
252 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
253
254 /* put the engine back into the active state */
255 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
256 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
257 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
258
259 /* wait for training to complete */
260 for (i = 0; i < rdev->usec_timeout; i++) {
261 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
262 break;
263 udelay(1);
264 }
265 for (i = 0; i < rdev->usec_timeout; i++) {
266 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
267 break;
268 udelay(1);
269 }
270
271 if (running)
272 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
273 }
274
275 return 0;
276}
277
0f0de06c
AD
278static int si_init_microcode(struct radeon_device *rdev)
279{
280 struct platform_device *pdev;
281 const char *chip_name;
282 const char *rlc_chip_name;
283 size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
284 char fw_name[30];
285 int err;
286
287 DRM_DEBUG("\n");
288
289 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
290 err = IS_ERR(pdev);
291 if (err) {
292 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
293 return -EINVAL;
294 }
295
296 switch (rdev->family) {
297 case CHIP_TAHITI:
298 chip_name = "TAHITI";
299 rlc_chip_name = "TAHITI";
300 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
301 me_req_size = SI_PM4_UCODE_SIZE * 4;
302 ce_req_size = SI_CE_UCODE_SIZE * 4;
303 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
304 mc_req_size = SI_MC_UCODE_SIZE * 4;
305 break;
306 case CHIP_PITCAIRN:
307 chip_name = "PITCAIRN";
308 rlc_chip_name = "PITCAIRN";
309 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
310 me_req_size = SI_PM4_UCODE_SIZE * 4;
311 ce_req_size = SI_CE_UCODE_SIZE * 4;
312 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
313 mc_req_size = SI_MC_UCODE_SIZE * 4;
314 break;
315 case CHIP_VERDE:
316 chip_name = "VERDE";
317 rlc_chip_name = "VERDE";
318 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
319 me_req_size = SI_PM4_UCODE_SIZE * 4;
320 ce_req_size = SI_CE_UCODE_SIZE * 4;
321 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
322 mc_req_size = SI_MC_UCODE_SIZE * 4;
323 break;
324 default: BUG();
325 }
326
327 DRM_INFO("Loading %s Microcode\n", chip_name);
328
329 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
330 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
331 if (err)
332 goto out;
333 if (rdev->pfp_fw->size != pfp_req_size) {
334 printk(KERN_ERR
335 "si_cp: Bogus length %zu in firmware \"%s\"\n",
336 rdev->pfp_fw->size, fw_name);
337 err = -EINVAL;
338 goto out;
339 }
340
341 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
342 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
343 if (err)
344 goto out;
345 if (rdev->me_fw->size != me_req_size) {
346 printk(KERN_ERR
347 "si_cp: Bogus length %zu in firmware \"%s\"\n",
348 rdev->me_fw->size, fw_name);
349 err = -EINVAL;
350 }
351
352 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
353 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
354 if (err)
355 goto out;
356 if (rdev->ce_fw->size != ce_req_size) {
357 printk(KERN_ERR
358 "si_cp: Bogus length %zu in firmware \"%s\"\n",
359 rdev->ce_fw->size, fw_name);
360 err = -EINVAL;
361 }
362
363 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
364 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
365 if (err)
366 goto out;
367 if (rdev->rlc_fw->size != rlc_req_size) {
368 printk(KERN_ERR
369 "si_rlc: Bogus length %zu in firmware \"%s\"\n",
370 rdev->rlc_fw->size, fw_name);
371 err = -EINVAL;
372 }
373
374 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
375 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
376 if (err)
377 goto out;
378 if (rdev->mc_fw->size != mc_req_size) {
379 printk(KERN_ERR
380 "si_mc: Bogus length %zu in firmware \"%s\"\n",
381 rdev->mc_fw->size, fw_name);
382 err = -EINVAL;
383 }
384
385out:
386 platform_device_unregister(pdev);
387
388 if (err) {
389 if (err != -EINVAL)
390 printk(KERN_ERR
391 "si_cp: Failed to load firmware \"%s\"\n",
392 fw_name);
393 release_firmware(rdev->pfp_fw);
394 rdev->pfp_fw = NULL;
395 release_firmware(rdev->me_fw);
396 rdev->me_fw = NULL;
397 release_firmware(rdev->ce_fw);
398 rdev->ce_fw = NULL;
399 release_firmware(rdev->rlc_fw);
400 rdev->rlc_fw = NULL;
401 release_firmware(rdev->mc_fw);
402 rdev->mc_fw = NULL;
403 }
404 return err;
405}
406
43b3cd99
AD
407/* watermark setup */
408static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
409 struct radeon_crtc *radeon_crtc,
410 struct drm_display_mode *mode,
411 struct drm_display_mode *other_mode)
412{
413 u32 tmp;
414 /*
415 * Line Buffer Setup
416 * There are 3 line buffers, each one shared by 2 display controllers.
417 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
418 * the display controllers. The paritioning is done via one of four
419 * preset allocations specified in bits 21:20:
420 * 0 - half lb
421 * 2 - whole lb, other crtc must be disabled
422 */
423 /* this can get tricky if we have two large displays on a paired group
424 * of crtcs. Ideally for multiple large displays we'd assign them to
425 * non-linked crtcs for maximum line buffer allocation.
426 */
427 if (radeon_crtc->base.enabled && mode) {
428 if (other_mode)
429 tmp = 0; /* 1/2 */
430 else
431 tmp = 2; /* whole */
432 } else
433 tmp = 0;
434
435 WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
436 DC_LB_MEMORY_CONFIG(tmp));
437
438 if (radeon_crtc->base.enabled && mode) {
439 switch (tmp) {
440 case 0:
441 default:
442 return 4096 * 2;
443 case 2:
444 return 8192 * 2;
445 }
446 }
447
448 /* controller not enabled, so no lb used */
449 return 0;
450}
451
452static u32 dce6_get_number_of_dram_channels(struct radeon_device *rdev)
453{
454 u32 tmp = RREG32(MC_SHARED_CHMAP);
455
456 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
457 case 0:
458 default:
459 return 1;
460 case 1:
461 return 2;
462 case 2:
463 return 4;
464 case 3:
465 return 8;
466 case 4:
467 return 3;
468 case 5:
469 return 6;
470 case 6:
471 return 10;
472 case 7:
473 return 12;
474 case 8:
475 return 16;
476 }
477}
478
479struct dce6_wm_params {
480 u32 dram_channels; /* number of dram channels */
481 u32 yclk; /* bandwidth per dram data pin in kHz */
482 u32 sclk; /* engine clock in kHz */
483 u32 disp_clk; /* display clock in kHz */
484 u32 src_width; /* viewport width */
485 u32 active_time; /* active display time in ns */
486 u32 blank_time; /* blank time in ns */
487 bool interlaced; /* mode is interlaced */
488 fixed20_12 vsc; /* vertical scale ratio */
489 u32 num_heads; /* number of active crtcs */
490 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
491 u32 lb_size; /* line buffer allocated to pipe */
492 u32 vtaps; /* vertical scaler taps */
493};
494
495static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
496{
497 /* Calculate raw DRAM Bandwidth */
498 fixed20_12 dram_efficiency; /* 0.7 */
499 fixed20_12 yclk, dram_channels, bandwidth;
500 fixed20_12 a;
501
502 a.full = dfixed_const(1000);
503 yclk.full = dfixed_const(wm->yclk);
504 yclk.full = dfixed_div(yclk, a);
505 dram_channels.full = dfixed_const(wm->dram_channels * 4);
506 a.full = dfixed_const(10);
507 dram_efficiency.full = dfixed_const(7);
508 dram_efficiency.full = dfixed_div(dram_efficiency, a);
509 bandwidth.full = dfixed_mul(dram_channels, yclk);
510 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
511
512 return dfixed_trunc(bandwidth);
513}
514
515static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
516{
517 /* Calculate DRAM Bandwidth and the part allocated to display. */
518 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
519 fixed20_12 yclk, dram_channels, bandwidth;
520 fixed20_12 a;
521
522 a.full = dfixed_const(1000);
523 yclk.full = dfixed_const(wm->yclk);
524 yclk.full = dfixed_div(yclk, a);
525 dram_channels.full = dfixed_const(wm->dram_channels * 4);
526 a.full = dfixed_const(10);
527 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
528 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
529 bandwidth.full = dfixed_mul(dram_channels, yclk);
530 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
531
532 return dfixed_trunc(bandwidth);
533}
534
535static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
536{
537 /* Calculate the display Data return Bandwidth */
538 fixed20_12 return_efficiency; /* 0.8 */
539 fixed20_12 sclk, bandwidth;
540 fixed20_12 a;
541
542 a.full = dfixed_const(1000);
543 sclk.full = dfixed_const(wm->sclk);
544 sclk.full = dfixed_div(sclk, a);
545 a.full = dfixed_const(10);
546 return_efficiency.full = dfixed_const(8);
547 return_efficiency.full = dfixed_div(return_efficiency, a);
548 a.full = dfixed_const(32);
549 bandwidth.full = dfixed_mul(a, sclk);
550 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
551
552 return dfixed_trunc(bandwidth);
553}
554
555static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
556{
557 return 32;
558}
559
560static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
561{
562 /* Calculate the DMIF Request Bandwidth */
563 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
564 fixed20_12 disp_clk, sclk, bandwidth;
565 fixed20_12 a, b1, b2;
566 u32 min_bandwidth;
567
568 a.full = dfixed_const(1000);
569 disp_clk.full = dfixed_const(wm->disp_clk);
570 disp_clk.full = dfixed_div(disp_clk, a);
571 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
572 b1.full = dfixed_mul(a, disp_clk);
573
574 a.full = dfixed_const(1000);
575 sclk.full = dfixed_const(wm->sclk);
576 sclk.full = dfixed_div(sclk, a);
577 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
578 b2.full = dfixed_mul(a, sclk);
579
580 a.full = dfixed_const(10);
581 disp_clk_request_efficiency.full = dfixed_const(8);
582 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
583
584 min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
585
586 a.full = dfixed_const(min_bandwidth);
587 bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
588
589 return dfixed_trunc(bandwidth);
590}
591
592static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
593{
594 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
595 u32 dram_bandwidth = dce6_dram_bandwidth(wm);
596 u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
597 u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
598
599 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
600}
601
602static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
603{
604 /* Calculate the display mode Average Bandwidth
605 * DisplayMode should contain the source and destination dimensions,
606 * timing, etc.
607 */
608 fixed20_12 bpp;
609 fixed20_12 line_time;
610 fixed20_12 src_width;
611 fixed20_12 bandwidth;
612 fixed20_12 a;
613
614 a.full = dfixed_const(1000);
615 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
616 line_time.full = dfixed_div(line_time, a);
617 bpp.full = dfixed_const(wm->bytes_per_pixel);
618 src_width.full = dfixed_const(wm->src_width);
619 bandwidth.full = dfixed_mul(src_width, bpp);
620 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
621 bandwidth.full = dfixed_div(bandwidth, line_time);
622
623 return dfixed_trunc(bandwidth);
624}
625
626static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
627{
628 /* First calcualte the latency in ns */
629 u32 mc_latency = 2000; /* 2000 ns. */
630 u32 available_bandwidth = dce6_available_bandwidth(wm);
631 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
632 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
633 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
634 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
635 (wm->num_heads * cursor_line_pair_return_time);
636 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
637 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
638 u32 tmp, dmif_size = 12288;
639 fixed20_12 a, b, c;
640
641 if (wm->num_heads == 0)
642 return 0;
643
644 a.full = dfixed_const(2);
645 b.full = dfixed_const(1);
646 if ((wm->vsc.full > a.full) ||
647 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
648 (wm->vtaps >= 5) ||
649 ((wm->vsc.full >= a.full) && wm->interlaced))
650 max_src_lines_per_dst_line = 4;
651 else
652 max_src_lines_per_dst_line = 2;
653
654 a.full = dfixed_const(available_bandwidth);
655 b.full = dfixed_const(wm->num_heads);
656 a.full = dfixed_div(a, b);
657
658 b.full = dfixed_const(mc_latency + 512);
659 c.full = dfixed_const(wm->disp_clk);
660 b.full = dfixed_div(b, c);
661
662 c.full = dfixed_const(dmif_size);
663 b.full = dfixed_div(c, b);
664
665 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
666
667 b.full = dfixed_const(1000);
668 c.full = dfixed_const(wm->disp_clk);
669 b.full = dfixed_div(c, b);
670 c.full = dfixed_const(wm->bytes_per_pixel);
671 b.full = dfixed_mul(b, c);
672
673 lb_fill_bw = min(tmp, dfixed_trunc(b));
674
675 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
676 b.full = dfixed_const(1000);
677 c.full = dfixed_const(lb_fill_bw);
678 b.full = dfixed_div(c, b);
679 a.full = dfixed_div(a, b);
680 line_fill_time = dfixed_trunc(a);
681
682 if (line_fill_time < wm->active_time)
683 return latency;
684 else
685 return latency + (line_fill_time - wm->active_time);
686
687}
688
689static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
690{
691 if (dce6_average_bandwidth(wm) <=
692 (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
693 return true;
694 else
695 return false;
696};
697
698static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
699{
700 if (dce6_average_bandwidth(wm) <=
701 (dce6_available_bandwidth(wm) / wm->num_heads))
702 return true;
703 else
704 return false;
705};
706
707static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
708{
709 u32 lb_partitions = wm->lb_size / wm->src_width;
710 u32 line_time = wm->active_time + wm->blank_time;
711 u32 latency_tolerant_lines;
712 u32 latency_hiding;
713 fixed20_12 a;
714
715 a.full = dfixed_const(1);
716 if (wm->vsc.full > a.full)
717 latency_tolerant_lines = 1;
718 else {
719 if (lb_partitions <= (wm->vtaps + 1))
720 latency_tolerant_lines = 1;
721 else
722 latency_tolerant_lines = 2;
723 }
724
725 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
726
727 if (dce6_latency_watermark(wm) <= latency_hiding)
728 return true;
729 else
730 return false;
731}
732
733static void dce6_program_watermarks(struct radeon_device *rdev,
734 struct radeon_crtc *radeon_crtc,
735 u32 lb_size, u32 num_heads)
736{
737 struct drm_display_mode *mode = &radeon_crtc->base.mode;
738 struct dce6_wm_params wm;
739 u32 pixel_period;
740 u32 line_time = 0;
741 u32 latency_watermark_a = 0, latency_watermark_b = 0;
742 u32 priority_a_mark = 0, priority_b_mark = 0;
743 u32 priority_a_cnt = PRIORITY_OFF;
744 u32 priority_b_cnt = PRIORITY_OFF;
745 u32 tmp, arb_control3;
746 fixed20_12 a, b, c;
747
748 if (radeon_crtc->base.enabled && num_heads && mode) {
749 pixel_period = 1000000 / (u32)mode->clock;
750 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
751 priority_a_cnt = 0;
752 priority_b_cnt = 0;
753
754 wm.yclk = rdev->pm.current_mclk * 10;
755 wm.sclk = rdev->pm.current_sclk * 10;
756 wm.disp_clk = mode->clock;
757 wm.src_width = mode->crtc_hdisplay;
758 wm.active_time = mode->crtc_hdisplay * pixel_period;
759 wm.blank_time = line_time - wm.active_time;
760 wm.interlaced = false;
761 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
762 wm.interlaced = true;
763 wm.vsc = radeon_crtc->vsc;
764 wm.vtaps = 1;
765 if (radeon_crtc->rmx_type != RMX_OFF)
766 wm.vtaps = 2;
767 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
768 wm.lb_size = lb_size;
769 wm.dram_channels = dce6_get_number_of_dram_channels(rdev);
770 wm.num_heads = num_heads;
771
772 /* set for high clocks */
773 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
774 /* set for low clocks */
775 /* wm.yclk = low clk; wm.sclk = low clk */
776 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
777
778 /* possibly force display priority to high */
779 /* should really do this at mode validation time... */
780 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
781 !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
782 !dce6_check_latency_hiding(&wm) ||
783 (rdev->disp_priority == 2)) {
784 DRM_DEBUG_KMS("force priority to high\n");
785 priority_a_cnt |= PRIORITY_ALWAYS_ON;
786 priority_b_cnt |= PRIORITY_ALWAYS_ON;
787 }
788
789 a.full = dfixed_const(1000);
790 b.full = dfixed_const(mode->clock);
791 b.full = dfixed_div(b, a);
792 c.full = dfixed_const(latency_watermark_a);
793 c.full = dfixed_mul(c, b);
794 c.full = dfixed_mul(c, radeon_crtc->hsc);
795 c.full = dfixed_div(c, a);
796 a.full = dfixed_const(16);
797 c.full = dfixed_div(c, a);
798 priority_a_mark = dfixed_trunc(c);
799 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
800
801 a.full = dfixed_const(1000);
802 b.full = dfixed_const(mode->clock);
803 b.full = dfixed_div(b, a);
804 c.full = dfixed_const(latency_watermark_b);
805 c.full = dfixed_mul(c, b);
806 c.full = dfixed_mul(c, radeon_crtc->hsc);
807 c.full = dfixed_div(c, a);
808 a.full = dfixed_const(16);
809 c.full = dfixed_div(c, a);
810 priority_b_mark = dfixed_trunc(c);
811 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
812 }
813
814 /* select wm A */
815 arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
816 tmp = arb_control3;
817 tmp &= ~LATENCY_WATERMARK_MASK(3);
818 tmp |= LATENCY_WATERMARK_MASK(1);
819 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
820 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
821 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
822 LATENCY_HIGH_WATERMARK(line_time)));
823 /* select wm B */
824 tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
825 tmp &= ~LATENCY_WATERMARK_MASK(3);
826 tmp |= LATENCY_WATERMARK_MASK(2);
827 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
828 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
829 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
830 LATENCY_HIGH_WATERMARK(line_time)));
831 /* restore original selection */
832 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
833
834 /* write the priority marks */
835 WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
836 WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
837
838}
839
840void dce6_bandwidth_update(struct radeon_device *rdev)
841{
842 struct drm_display_mode *mode0 = NULL;
843 struct drm_display_mode *mode1 = NULL;
844 u32 num_heads = 0, lb_size;
845 int i;
846
847 radeon_update_display_priority(rdev);
848
849 for (i = 0; i < rdev->num_crtc; i++) {
850 if (rdev->mode_info.crtcs[i]->base.enabled)
851 num_heads++;
852 }
853 for (i = 0; i < rdev->num_crtc; i += 2) {
854 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
855 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
856 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
857 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
858 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
859 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
860 }
861}
862
0a96d72b
AD
863/*
864 * Core functions
865 */
866static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
867 u32 num_tile_pipes,
868 u32 num_backends_per_asic,
869 u32 *backend_disable_mask_per_asic,
870 u32 num_shader_engines)
871{
872 u32 backend_map = 0;
873 u32 enabled_backends_mask = 0;
874 u32 enabled_backends_count = 0;
875 u32 num_backends_per_se;
876 u32 cur_pipe;
877 u32 swizzle_pipe[SI_MAX_PIPES];
878 u32 cur_backend = 0;
879 u32 i;
880 bool force_no_swizzle;
881
882 /* force legal values */
883 if (num_tile_pipes < 1)
884 num_tile_pipes = 1;
885 if (num_tile_pipes > rdev->config.si.max_tile_pipes)
886 num_tile_pipes = rdev->config.si.max_tile_pipes;
887 if (num_shader_engines < 1)
888 num_shader_engines = 1;
889 if (num_shader_engines > rdev->config.si.max_shader_engines)
890 num_shader_engines = rdev->config.si.max_shader_engines;
891 if (num_backends_per_asic < num_shader_engines)
892 num_backends_per_asic = num_shader_engines;
893 if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
894 num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
895
896 /* make sure we have the same number of backends per se */
897 num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
898 /* set up the number of backends per se */
899 num_backends_per_se = num_backends_per_asic / num_shader_engines;
900 if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
901 num_backends_per_se = rdev->config.si.max_backends_per_se;
902 num_backends_per_asic = num_backends_per_se * num_shader_engines;
903 }
904
905 /* create enable mask and count for enabled backends */
906 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
907 if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
908 enabled_backends_mask |= (1 << i);
909 ++enabled_backends_count;
910 }
911 if (enabled_backends_count == num_backends_per_asic)
912 break;
913 }
914
915 /* force the backends mask to match the current number of backends */
916 if (enabled_backends_count != num_backends_per_asic) {
917 u32 this_backend_enabled;
918 u32 shader_engine;
919 u32 backend_per_se;
920
921 enabled_backends_mask = 0;
922 enabled_backends_count = 0;
923 *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
924 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
925 /* calc the current se */
926 shader_engine = i / rdev->config.si.max_backends_per_se;
927 /* calc the backend per se */
928 backend_per_se = i % rdev->config.si.max_backends_per_se;
929 /* default to not enabled */
930 this_backend_enabled = 0;
931 if ((shader_engine < num_shader_engines) &&
932 (backend_per_se < num_backends_per_se))
933 this_backend_enabled = 1;
934 if (this_backend_enabled) {
935 enabled_backends_mask |= (1 << i);
936 *backend_disable_mask_per_asic &= ~(1 << i);
937 ++enabled_backends_count;
938 }
939 }
940 }
941
942
943 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
944 switch (rdev->family) {
945 case CHIP_TAHITI:
946 case CHIP_PITCAIRN:
947 case CHIP_VERDE:
948 force_no_swizzle = true;
949 break;
950 default:
951 force_no_swizzle = false;
952 break;
953 }
954 if (force_no_swizzle) {
955 bool last_backend_enabled = false;
956
957 force_no_swizzle = false;
958 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
959 if (((enabled_backends_mask >> i) & 1) == 1) {
960 if (last_backend_enabled)
961 force_no_swizzle = true;
962 last_backend_enabled = true;
963 } else
964 last_backend_enabled = false;
965 }
966 }
967
968 switch (num_tile_pipes) {
969 case 1:
970 case 3:
971 case 5:
972 case 7:
973 DRM_ERROR("odd number of pipes!\n");
974 break;
975 case 2:
976 swizzle_pipe[0] = 0;
977 swizzle_pipe[1] = 1;
978 break;
979 case 4:
980 if (force_no_swizzle) {
981 swizzle_pipe[0] = 0;
982 swizzle_pipe[1] = 1;
983 swizzle_pipe[2] = 2;
984 swizzle_pipe[3] = 3;
985 } else {
986 swizzle_pipe[0] = 0;
987 swizzle_pipe[1] = 2;
988 swizzle_pipe[2] = 1;
989 swizzle_pipe[3] = 3;
990 }
991 break;
992 case 6:
993 if (force_no_swizzle) {
994 swizzle_pipe[0] = 0;
995 swizzle_pipe[1] = 1;
996 swizzle_pipe[2] = 2;
997 swizzle_pipe[3] = 3;
998 swizzle_pipe[4] = 4;
999 swizzle_pipe[5] = 5;
1000 } else {
1001 swizzle_pipe[0] = 0;
1002 swizzle_pipe[1] = 2;
1003 swizzle_pipe[2] = 4;
1004 swizzle_pipe[3] = 1;
1005 swizzle_pipe[4] = 3;
1006 swizzle_pipe[5] = 5;
1007 }
1008 break;
1009 case 8:
1010 if (force_no_swizzle) {
1011 swizzle_pipe[0] = 0;
1012 swizzle_pipe[1] = 1;
1013 swizzle_pipe[2] = 2;
1014 swizzle_pipe[3] = 3;
1015 swizzle_pipe[4] = 4;
1016 swizzle_pipe[5] = 5;
1017 swizzle_pipe[6] = 6;
1018 swizzle_pipe[7] = 7;
1019 } else {
1020 swizzle_pipe[0] = 0;
1021 swizzle_pipe[1] = 2;
1022 swizzle_pipe[2] = 4;
1023 swizzle_pipe[3] = 6;
1024 swizzle_pipe[4] = 1;
1025 swizzle_pipe[5] = 3;
1026 swizzle_pipe[6] = 5;
1027 swizzle_pipe[7] = 7;
1028 }
1029 break;
1030 }
1031
1032 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1033 while (((1 << cur_backend) & enabled_backends_mask) == 0)
1034 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1035
1036 backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
1037
1038 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1039 }
1040
1041 return backend_map;
1042}
1043
1044static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
1045 u32 disable_mask_per_se,
1046 u32 max_disable_mask_per_se,
1047 u32 num_shader_engines)
1048{
1049 u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
1050 u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
1051
1052 if (num_shader_engines == 1)
1053 return disable_mask_per_asic;
1054 else if (num_shader_engines == 2)
1055 return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
1056 else
1057 return 0xffffffff;
1058}
1059
1060static void si_tiling_mode_table_init(struct radeon_device *rdev)
1061{
1062 const u32 num_tile_mode_states = 32;
1063 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1064
1065 switch (rdev->config.si.mem_row_size_in_kb) {
1066 case 1:
1067 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1068 break;
1069 case 2:
1070 default:
1071 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1072 break;
1073 case 4:
1074 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1075 break;
1076 }
1077
1078 if ((rdev->family == CHIP_TAHITI) ||
1079 (rdev->family == CHIP_PITCAIRN)) {
1080 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1081 switch (reg_offset) {
1082 case 0: /* non-AA compressed depth or any compressed stencil */
1083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1085 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1087 NUM_BANKS(ADDR_SURF_16_BANK) |
1088 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1091 break;
1092 case 1: /* 2xAA/4xAA compressed depth only */
1093 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1094 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1095 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1096 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1097 NUM_BANKS(ADDR_SURF_16_BANK) |
1098 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1101 break;
1102 case 2: /* 8xAA compressed depth only */
1103 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1104 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1105 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1107 NUM_BANKS(ADDR_SURF_16_BANK) |
1108 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1111 break;
1112 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1113 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1114 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1115 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1116 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1117 NUM_BANKS(ADDR_SURF_16_BANK) |
1118 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1121 break;
1122 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1123 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1124 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1125 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1127 NUM_BANKS(ADDR_SURF_16_BANK) |
1128 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1131 break;
1132 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1133 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1134 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1135 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1136 TILE_SPLIT(split_equal_to_row_size) |
1137 NUM_BANKS(ADDR_SURF_16_BANK) |
1138 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1141 break;
1142 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1143 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1144 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1145 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1146 TILE_SPLIT(split_equal_to_row_size) |
1147 NUM_BANKS(ADDR_SURF_16_BANK) |
1148 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1151 break;
1152 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1153 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1154 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1155 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1156 TILE_SPLIT(split_equal_to_row_size) |
1157 NUM_BANKS(ADDR_SURF_16_BANK) |
1158 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1161 break;
1162 case 8: /* 1D and 1D Array Surfaces */
1163 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1164 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1165 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1167 NUM_BANKS(ADDR_SURF_16_BANK) |
1168 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1171 break;
1172 case 9: /* Displayable maps. */
1173 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1174 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1175 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1176 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1177 NUM_BANKS(ADDR_SURF_16_BANK) |
1178 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1181 break;
1182 case 10: /* Display 8bpp. */
1183 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1184 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1185 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1186 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1187 NUM_BANKS(ADDR_SURF_16_BANK) |
1188 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1191 break;
1192 case 11: /* Display 16bpp. */
1193 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1194 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1195 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1196 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1197 NUM_BANKS(ADDR_SURF_16_BANK) |
1198 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1201 break;
1202 case 12: /* Display 32bpp. */
1203 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1204 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1205 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1207 NUM_BANKS(ADDR_SURF_16_BANK) |
1208 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1211 break;
1212 case 13: /* Thin. */
1213 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1214 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1215 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1217 NUM_BANKS(ADDR_SURF_16_BANK) |
1218 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1221 break;
1222 case 14: /* Thin 8 bpp. */
1223 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1224 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1225 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1227 NUM_BANKS(ADDR_SURF_16_BANK) |
1228 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1231 break;
1232 case 15: /* Thin 16 bpp. */
1233 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1234 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1235 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1237 NUM_BANKS(ADDR_SURF_16_BANK) |
1238 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1241 break;
1242 case 16: /* Thin 32 bpp. */
1243 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1244 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1245 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1247 NUM_BANKS(ADDR_SURF_16_BANK) |
1248 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1251 break;
1252 case 17: /* Thin 64 bpp. */
1253 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1254 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1255 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1256 TILE_SPLIT(split_equal_to_row_size) |
1257 NUM_BANKS(ADDR_SURF_16_BANK) |
1258 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1261 break;
1262 case 21: /* 8 bpp PRT. */
1263 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1264 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1265 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1267 NUM_BANKS(ADDR_SURF_16_BANK) |
1268 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1271 break;
1272 case 22: /* 16 bpp PRT */
1273 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1274 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1275 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1276 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1277 NUM_BANKS(ADDR_SURF_16_BANK) |
1278 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1281 break;
1282 case 23: /* 32 bpp PRT */
1283 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1285 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1287 NUM_BANKS(ADDR_SURF_16_BANK) |
1288 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1291 break;
1292 case 24: /* 64 bpp PRT */
1293 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1294 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1295 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1297 NUM_BANKS(ADDR_SURF_16_BANK) |
1298 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1299 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1300 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1301 break;
1302 case 25: /* 128 bpp PRT */
1303 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1304 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1305 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1306 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1307 NUM_BANKS(ADDR_SURF_8_BANK) |
1308 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1311 break;
1312 default:
1313 gb_tile_moden = 0;
1314 break;
1315 }
1316 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1317 }
1318 } else if (rdev->family == CHIP_VERDE) {
1319 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1320 switch (reg_offset) {
1321 case 0: /* non-AA compressed depth or any compressed stencil */
1322 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1323 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1326 NUM_BANKS(ADDR_SURF_16_BANK) |
1327 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1330 break;
1331 case 1: /* 2xAA/4xAA compressed depth only */
1332 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1333 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1334 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1335 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1336 NUM_BANKS(ADDR_SURF_16_BANK) |
1337 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1340 break;
1341 case 2: /* 8xAA compressed depth only */
1342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1344 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1346 NUM_BANKS(ADDR_SURF_16_BANK) |
1347 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1350 break;
1351 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1352 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1353 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1354 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1355 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1356 NUM_BANKS(ADDR_SURF_16_BANK) |
1357 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1360 break;
1361 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1362 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1363 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1364 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1365 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1366 NUM_BANKS(ADDR_SURF_16_BANK) |
1367 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1370 break;
1371 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1372 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1373 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1374 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1375 TILE_SPLIT(split_equal_to_row_size) |
1376 NUM_BANKS(ADDR_SURF_16_BANK) |
1377 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1380 break;
1381 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1382 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1383 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1384 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1385 TILE_SPLIT(split_equal_to_row_size) |
1386 NUM_BANKS(ADDR_SURF_16_BANK) |
1387 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1390 break;
1391 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1392 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1393 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1394 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1395 TILE_SPLIT(split_equal_to_row_size) |
1396 NUM_BANKS(ADDR_SURF_16_BANK) |
1397 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1400 break;
1401 case 8: /* 1D and 1D Array Surfaces */
1402 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1403 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1404 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1405 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1406 NUM_BANKS(ADDR_SURF_16_BANK) |
1407 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1410 break;
1411 case 9: /* Displayable maps. */
1412 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1413 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1414 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1416 NUM_BANKS(ADDR_SURF_16_BANK) |
1417 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1420 break;
1421 case 10: /* Display 8bpp. */
1422 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1423 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1424 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1425 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1426 NUM_BANKS(ADDR_SURF_16_BANK) |
1427 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1430 break;
1431 case 11: /* Display 16bpp. */
1432 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1433 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1434 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1435 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1436 NUM_BANKS(ADDR_SURF_16_BANK) |
1437 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1440 break;
1441 case 12: /* Display 32bpp. */
1442 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1443 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1444 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1445 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1446 NUM_BANKS(ADDR_SURF_16_BANK) |
1447 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1450 break;
1451 case 13: /* Thin. */
1452 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1453 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1454 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1455 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1456 NUM_BANKS(ADDR_SURF_16_BANK) |
1457 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1460 break;
1461 case 14: /* Thin 8 bpp. */
1462 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1463 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1464 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1465 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1466 NUM_BANKS(ADDR_SURF_16_BANK) |
1467 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1470 break;
1471 case 15: /* Thin 16 bpp. */
1472 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1473 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1474 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1475 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1476 NUM_BANKS(ADDR_SURF_16_BANK) |
1477 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1480 break;
1481 case 16: /* Thin 32 bpp. */
1482 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1483 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1484 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1485 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1486 NUM_BANKS(ADDR_SURF_16_BANK) |
1487 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1490 break;
1491 case 17: /* Thin 64 bpp. */
1492 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1493 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1494 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1495 TILE_SPLIT(split_equal_to_row_size) |
1496 NUM_BANKS(ADDR_SURF_16_BANK) |
1497 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1500 break;
1501 case 21: /* 8 bpp PRT. */
1502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1504 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1506 NUM_BANKS(ADDR_SURF_16_BANK) |
1507 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1510 break;
1511 case 22: /* 16 bpp PRT */
1512 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1513 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1514 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1515 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1516 NUM_BANKS(ADDR_SURF_16_BANK) |
1517 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1520 break;
1521 case 23: /* 32 bpp PRT */
1522 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1523 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1524 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1526 NUM_BANKS(ADDR_SURF_16_BANK) |
1527 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1530 break;
1531 case 24: /* 64 bpp PRT */
1532 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1533 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1534 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1535 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1536 NUM_BANKS(ADDR_SURF_16_BANK) |
1537 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1540 break;
1541 case 25: /* 128 bpp PRT */
1542 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1543 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1544 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1545 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1546 NUM_BANKS(ADDR_SURF_8_BANK) |
1547 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1550 break;
1551 default:
1552 gb_tile_moden = 0;
1553 break;
1554 }
1555 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1556 }
1557 } else
1558 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1559}
1560
1561static void si_gpu_init(struct radeon_device *rdev)
1562{
1563 u32 cc_rb_backend_disable = 0;
1564 u32 cc_gc_shader_array_config;
1565 u32 gb_addr_config = 0;
1566 u32 mc_shared_chmap, mc_arb_ramcfg;
1567 u32 gb_backend_map;
1568 u32 cgts_tcc_disable;
1569 u32 sx_debug_1;
1570 u32 gc_user_shader_array_config;
1571 u32 gc_user_rb_backend_disable;
1572 u32 cgts_user_tcc_disable;
1573 u32 hdp_host_path_cntl;
1574 u32 tmp;
1575 int i, j;
1576
1577 switch (rdev->family) {
1578 case CHIP_TAHITI:
1579 rdev->config.si.max_shader_engines = 2;
1580 rdev->config.si.max_pipes_per_simd = 4;
1581 rdev->config.si.max_tile_pipes = 12;
1582 rdev->config.si.max_simds_per_se = 8;
1583 rdev->config.si.max_backends_per_se = 4;
1584 rdev->config.si.max_texture_channel_caches = 12;
1585 rdev->config.si.max_gprs = 256;
1586 rdev->config.si.max_gs_threads = 32;
1587 rdev->config.si.max_hw_contexts = 8;
1588
1589 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1590 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1591 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1592 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1593 break;
1594 case CHIP_PITCAIRN:
1595 rdev->config.si.max_shader_engines = 2;
1596 rdev->config.si.max_pipes_per_simd = 4;
1597 rdev->config.si.max_tile_pipes = 8;
1598 rdev->config.si.max_simds_per_se = 5;
1599 rdev->config.si.max_backends_per_se = 4;
1600 rdev->config.si.max_texture_channel_caches = 8;
1601 rdev->config.si.max_gprs = 256;
1602 rdev->config.si.max_gs_threads = 32;
1603 rdev->config.si.max_hw_contexts = 8;
1604
1605 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1606 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1607 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1608 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1609 break;
1610 case CHIP_VERDE:
1611 default:
1612 rdev->config.si.max_shader_engines = 1;
1613 rdev->config.si.max_pipes_per_simd = 4;
1614 rdev->config.si.max_tile_pipes = 4;
1615 rdev->config.si.max_simds_per_se = 2;
1616 rdev->config.si.max_backends_per_se = 4;
1617 rdev->config.si.max_texture_channel_caches = 4;
1618 rdev->config.si.max_gprs = 256;
1619 rdev->config.si.max_gs_threads = 32;
1620 rdev->config.si.max_hw_contexts = 8;
1621
1622 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1623 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1624 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1625 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1626 break;
1627 }
1628
1629 /* Initialize HDP */
1630 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1631 WREG32((0x2c14 + j), 0x00000000);
1632 WREG32((0x2c18 + j), 0x00000000);
1633 WREG32((0x2c1c + j), 0x00000000);
1634 WREG32((0x2c20 + j), 0x00000000);
1635 WREG32((0x2c24 + j), 0x00000000);
1636 }
1637
1638 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1639
1640 evergreen_fix_pci_max_read_req_size(rdev);
1641
1642 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1643
1644 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1645 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1646
1647 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
1648 cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1649 cgts_tcc_disable = 0xffff0000;
1650 for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
1651 cgts_tcc_disable &= ~(1 << (16 + i));
1652 gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
1653 gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1654 cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
1655
1656 rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
1657 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1658 tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1659 rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
1660 tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1661 rdev->config.si.backend_disable_mask_per_asic =
1662 si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
1663 rdev->config.si.num_shader_engines);
1664 rdev->config.si.backend_map =
1665 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1666 rdev->config.si.num_backends_per_se *
1667 rdev->config.si.num_shader_engines,
1668 &rdev->config.si.backend_disable_mask_per_asic,
1669 rdev->config.si.num_shader_engines);
1670 tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
1671 rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
1672 rdev->config.si.mem_max_burst_length_bytes = 256;
1673 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1674 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1675 if (rdev->config.si.mem_row_size_in_kb > 4)
1676 rdev->config.si.mem_row_size_in_kb = 4;
1677 /* XXX use MC settings? */
1678 rdev->config.si.shader_engine_tile_size = 32;
1679 rdev->config.si.num_gpus = 1;
1680 rdev->config.si.multi_gpu_tile_size = 64;
1681
1682 gb_addr_config = 0;
1683 switch (rdev->config.si.num_tile_pipes) {
1684 case 1:
1685 gb_addr_config |= NUM_PIPES(0);
1686 break;
1687 case 2:
1688 gb_addr_config |= NUM_PIPES(1);
1689 break;
1690 case 4:
1691 gb_addr_config |= NUM_PIPES(2);
1692 break;
1693 case 8:
1694 default:
1695 gb_addr_config |= NUM_PIPES(3);
1696 break;
1697 }
1698
1699 tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
1700 gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
1701 gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
1702 tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
1703 gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
1704 switch (rdev->config.si.num_gpus) {
1705 case 1:
1706 default:
1707 gb_addr_config |= NUM_GPUS(0);
1708 break;
1709 case 2:
1710 gb_addr_config |= NUM_GPUS(1);
1711 break;
1712 case 4:
1713 gb_addr_config |= NUM_GPUS(2);
1714 break;
1715 }
1716 switch (rdev->config.si.multi_gpu_tile_size) {
1717 case 16:
1718 gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
1719 break;
1720 case 32:
1721 default:
1722 gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
1723 break;
1724 case 64:
1725 gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
1726 break;
1727 case 128:
1728 gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
1729 break;
1730 }
1731 switch (rdev->config.si.mem_row_size_in_kb) {
1732 case 1:
1733 default:
1734 gb_addr_config |= ROW_SIZE(0);
1735 break;
1736 case 2:
1737 gb_addr_config |= ROW_SIZE(1);
1738 break;
1739 case 4:
1740 gb_addr_config |= ROW_SIZE(2);
1741 break;
1742 }
1743
1744 tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
1745 rdev->config.si.num_tile_pipes = (1 << tmp);
1746 tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
1747 rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
1748 tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
1749 rdev->config.si.num_shader_engines = tmp + 1;
1750 tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
1751 rdev->config.si.num_gpus = tmp + 1;
1752 tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
1753 rdev->config.si.multi_gpu_tile_size = 1 << tmp;
1754 tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
1755 rdev->config.si.mem_row_size_in_kb = 1 << tmp;
1756
1757 gb_backend_map =
1758 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1759 rdev->config.si.num_backends_per_se *
1760 rdev->config.si.num_shader_engines,
1761 &rdev->config.si.backend_disable_mask_per_asic,
1762 rdev->config.si.num_shader_engines);
1763
1764 /* setup tiling info dword. gb_addr_config is not adequate since it does
1765 * not have bank info, so create a custom tiling dword.
1766 * bits 3:0 num_pipes
1767 * bits 7:4 num_banks
1768 * bits 11:8 group_size
1769 * bits 15:12 row_size
1770 */
1771 rdev->config.si.tile_config = 0;
1772 switch (rdev->config.si.num_tile_pipes) {
1773 case 1:
1774 rdev->config.si.tile_config |= (0 << 0);
1775 break;
1776 case 2:
1777 rdev->config.si.tile_config |= (1 << 0);
1778 break;
1779 case 4:
1780 rdev->config.si.tile_config |= (2 << 0);
1781 break;
1782 case 8:
1783 default:
1784 /* XXX what about 12? */
1785 rdev->config.si.tile_config |= (3 << 0);
1786 break;
1787 }
1788 rdev->config.si.tile_config |=
1789 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
1790 rdev->config.si.tile_config |=
1791 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1792 rdev->config.si.tile_config |=
1793 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1794
1795 rdev->config.si.backend_map = gb_backend_map;
1796 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1797 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1798 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1799
1800 /* primary versions */
1801 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1802 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1803 WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1804
1805 WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1806
1807 /* user versions */
1808 WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1809 WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1810 WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1811
1812 WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1813
1814 si_tiling_mode_table_init(rdev);
1815
1816 /* set HW defaults for 3D engine */
1817 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1818 ROQ_IB2_START(0x2b)));
1819 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1820
1821 sx_debug_1 = RREG32(SX_DEBUG_1);
1822 WREG32(SX_DEBUG_1, sx_debug_1);
1823
1824 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1825
1826 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1827 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1828 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1829 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1830
1831 WREG32(VGT_NUM_INSTANCES, 1);
1832
1833 WREG32(CP_PERFMON_CNTL, 0);
1834
1835 WREG32(SQ_CONFIG, 0);
1836
1837 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1838 FORCE_EOV_MAX_REZ_CNT(255)));
1839
1840 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1841 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1842
1843 WREG32(VGT_GS_VERTEX_REUSE, 16);
1844 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1845
1846 WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1847 WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1848 WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1849 WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1850 WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1851 WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1852 WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1853 WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1854
1855 tmp = RREG32(HDP_MISC_CNTL);
1856 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1857 WREG32(HDP_MISC_CNTL, tmp);
1858
1859 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1860 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1861
1862 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1863
1864 udelay(50);
1865}
c476dde2 1866
2ece2e8b
AD
1867/*
1868 * GPU scratch registers helpers function.
1869 */
1870static void si_scratch_init(struct radeon_device *rdev)
1871{
1872 int i;
1873
1874 rdev->scratch.num_reg = 7;
1875 rdev->scratch.reg_base = SCRATCH_REG0;
1876 for (i = 0; i < rdev->scratch.num_reg; i++) {
1877 rdev->scratch.free[i] = true;
1878 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1879 }
1880}
1881
1882void si_fence_ring_emit(struct radeon_device *rdev,
1883 struct radeon_fence *fence)
1884{
1885 struct radeon_ring *ring = &rdev->ring[fence->ring];
1886 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1887
1888 /* flush read cache over gart */
1889 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1890 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1891 radeon_ring_write(ring, 0);
1892 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1893 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1894 PACKET3_TC_ACTION_ENA |
1895 PACKET3_SH_KCACHE_ACTION_ENA |
1896 PACKET3_SH_ICACHE_ACTION_ENA);
1897 radeon_ring_write(ring, 0xFFFFFFFF);
1898 radeon_ring_write(ring, 0);
1899 radeon_ring_write(ring, 10); /* poll interval */
1900 /* EVENT_WRITE_EOP - flush caches, send int */
1901 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1902 radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
1903 radeon_ring_write(ring, addr & 0xffffffff);
1904 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1905 radeon_ring_write(ring, fence->seq);
1906 radeon_ring_write(ring, 0);
1907}
1908
1909/*
1910 * IB stuff
1911 */
1912void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1913{
1914 struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
1915 u32 header;
1916
1917 if (ib->is_const_ib)
1918 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1919 else
1920 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1921
1922 radeon_ring_write(ring, header);
1923 radeon_ring_write(ring,
1924#ifdef __BIG_ENDIAN
1925 (2 << 0) |
1926#endif
1927 (ib->gpu_addr & 0xFFFFFFFC));
1928 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1929 radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24));
1930
1931 /* flush read cache over gart for this vmid */
1932 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1933 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1934 radeon_ring_write(ring, ib->vm_id);
1935 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1936 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1937 PACKET3_TC_ACTION_ENA |
1938 PACKET3_SH_KCACHE_ACTION_ENA |
1939 PACKET3_SH_ICACHE_ACTION_ENA);
1940 radeon_ring_write(ring, 0xFFFFFFFF);
1941 radeon_ring_write(ring, 0);
1942 radeon_ring_write(ring, 10); /* poll interval */
1943}
1944
48c0c902
AD
1945/*
1946 * CP.
1947 */
1948static void si_cp_enable(struct radeon_device *rdev, bool enable)
1949{
1950 if (enable)
1951 WREG32(CP_ME_CNTL, 0);
1952 else {
1953 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1954 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1955 WREG32(SCRATCH_UMSK, 0);
1956 }
1957 udelay(50);
1958}
1959
1960static int si_cp_load_microcode(struct radeon_device *rdev)
1961{
1962 const __be32 *fw_data;
1963 int i;
1964
1965 if (!rdev->me_fw || !rdev->pfp_fw)
1966 return -EINVAL;
1967
1968 si_cp_enable(rdev, false);
1969
1970 /* PFP */
1971 fw_data = (const __be32 *)rdev->pfp_fw->data;
1972 WREG32(CP_PFP_UCODE_ADDR, 0);
1973 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1974 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1975 WREG32(CP_PFP_UCODE_ADDR, 0);
1976
1977 /* CE */
1978 fw_data = (const __be32 *)rdev->ce_fw->data;
1979 WREG32(CP_CE_UCODE_ADDR, 0);
1980 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1981 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1982 WREG32(CP_CE_UCODE_ADDR, 0);
1983
1984 /* ME */
1985 fw_data = (const __be32 *)rdev->me_fw->data;
1986 WREG32(CP_ME_RAM_WADDR, 0);
1987 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1988 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1989 WREG32(CP_ME_RAM_WADDR, 0);
1990
1991 WREG32(CP_PFP_UCODE_ADDR, 0);
1992 WREG32(CP_CE_UCODE_ADDR, 0);
1993 WREG32(CP_ME_RAM_WADDR, 0);
1994 WREG32(CP_ME_RAM_RADDR, 0);
1995 return 0;
1996}
1997
1998static int si_cp_start(struct radeon_device *rdev)
1999{
2000 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2001 int r, i;
2002
2003 r = radeon_ring_lock(rdev, ring, 7 + 4);
2004 if (r) {
2005 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2006 return r;
2007 }
2008 /* init the CP */
2009 radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2010 radeon_ring_write(ring, 0x1);
2011 radeon_ring_write(ring, 0x0);
2012 radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
2013 radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2014 radeon_ring_write(ring, 0);
2015 radeon_ring_write(ring, 0);
2016
2017 /* init the CE partitions */
2018 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2019 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2020 radeon_ring_write(ring, 0xc000);
2021 radeon_ring_write(ring, 0xe000);
2022 radeon_ring_unlock_commit(rdev, ring);
2023
2024 si_cp_enable(rdev, true);
2025
2026 r = radeon_ring_lock(rdev, ring, si_default_size + 10);
2027 if (r) {
2028 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2029 return r;
2030 }
2031
2032 /* setup clear context state */
2033 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2034 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2035
2036 for (i = 0; i < si_default_size; i++)
2037 radeon_ring_write(ring, si_default_state[i]);
2038
2039 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2040 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2041
2042 /* set clear context state */
2043 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2044 radeon_ring_write(ring, 0);
2045
2046 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2047 radeon_ring_write(ring, 0x00000316);
2048 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2049 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2050
2051 radeon_ring_unlock_commit(rdev, ring);
2052
2053 for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
2054 ring = &rdev->ring[i];
2055 r = radeon_ring_lock(rdev, ring, 2);
2056
2057 /* clear the compute context state */
2058 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
2059 radeon_ring_write(ring, 0);
2060
2061 radeon_ring_unlock_commit(rdev, ring);
2062 }
2063
2064 return 0;
2065}
2066
2067static void si_cp_fini(struct radeon_device *rdev)
2068{
2069 si_cp_enable(rdev, false);
2070 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2071 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2072 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2073}
2074
2075static int si_cp_resume(struct radeon_device *rdev)
2076{
2077 struct radeon_ring *ring;
2078 u32 tmp;
2079 u32 rb_bufsz;
2080 int r;
2081
2082 /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2083 WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2084 SOFT_RESET_PA |
2085 SOFT_RESET_VGT |
2086 SOFT_RESET_SPI |
2087 SOFT_RESET_SX));
2088 RREG32(GRBM_SOFT_RESET);
2089 mdelay(15);
2090 WREG32(GRBM_SOFT_RESET, 0);
2091 RREG32(GRBM_SOFT_RESET);
2092
2093 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2094 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2095
2096 /* Set the write pointer delay */
2097 WREG32(CP_RB_WPTR_DELAY, 0);
2098
2099 WREG32(CP_DEBUG, 0);
2100 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2101
2102 /* ring 0 - compute and gfx */
2103 /* Set ring buffer size */
2104 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2105 rb_bufsz = drm_order(ring->ring_size / 8);
2106 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2107#ifdef __BIG_ENDIAN
2108 tmp |= BUF_SWAP_32BIT;
2109#endif
2110 WREG32(CP_RB0_CNTL, tmp);
2111
2112 /* Initialize the ring buffer's read and write pointers */
2113 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2114 ring->wptr = 0;
2115 WREG32(CP_RB0_WPTR, ring->wptr);
2116
2117 /* set the wb address wether it's enabled or not */
2118 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2119 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2120
2121 if (rdev->wb.enabled)
2122 WREG32(SCRATCH_UMSK, 0xff);
2123 else {
2124 tmp |= RB_NO_UPDATE;
2125 WREG32(SCRATCH_UMSK, 0);
2126 }
2127
2128 mdelay(1);
2129 WREG32(CP_RB0_CNTL, tmp);
2130
2131 WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2132
2133 ring->rptr = RREG32(CP_RB0_RPTR);
2134
2135 /* ring1 - compute only */
2136 /* Set ring buffer size */
2137 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2138 rb_bufsz = drm_order(ring->ring_size / 8);
2139 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2140#ifdef __BIG_ENDIAN
2141 tmp |= BUF_SWAP_32BIT;
2142#endif
2143 WREG32(CP_RB1_CNTL, tmp);
2144
2145 /* Initialize the ring buffer's read and write pointers */
2146 WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2147 ring->wptr = 0;
2148 WREG32(CP_RB1_WPTR, ring->wptr);
2149
2150 /* set the wb address wether it's enabled or not */
2151 WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2152 WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2153
2154 mdelay(1);
2155 WREG32(CP_RB1_CNTL, tmp);
2156
2157 WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2158
2159 ring->rptr = RREG32(CP_RB1_RPTR);
2160
2161 /* ring2 - compute only */
2162 /* Set ring buffer size */
2163 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2164 rb_bufsz = drm_order(ring->ring_size / 8);
2165 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2166#ifdef __BIG_ENDIAN
2167 tmp |= BUF_SWAP_32BIT;
2168#endif
2169 WREG32(CP_RB2_CNTL, tmp);
2170
2171 /* Initialize the ring buffer's read and write pointers */
2172 WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2173 ring->wptr = 0;
2174 WREG32(CP_RB2_WPTR, ring->wptr);
2175
2176 /* set the wb address wether it's enabled or not */
2177 WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2178 WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2179
2180 mdelay(1);
2181 WREG32(CP_RB2_CNTL, tmp);
2182
2183 WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2184
2185 ring->rptr = RREG32(CP_RB2_RPTR);
2186
2187 /* start the rings */
2188 si_cp_start(rdev);
2189 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2190 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2191 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2192 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2193 if (r) {
2194 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2195 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2196 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2197 return r;
2198 }
2199 r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2200 if (r) {
2201 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2202 }
2203 r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2204 if (r) {
2205 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2206 }
2207
2208 return 0;
2209}
2210
c476dde2
AD
2211bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2212{
2213 u32 srbm_status;
2214 u32 grbm_status, grbm_status2;
2215 u32 grbm_status_se0, grbm_status_se1;
2216 struct r100_gpu_lockup *lockup = &rdev->config.si.lockup;
2217 int r;
2218
2219 srbm_status = RREG32(SRBM_STATUS);
2220 grbm_status = RREG32(GRBM_STATUS);
2221 grbm_status2 = RREG32(GRBM_STATUS2);
2222 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2223 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2224 if (!(grbm_status & GUI_ACTIVE)) {
2225 r100_gpu_lockup_update(lockup, ring);
2226 return false;
2227 }
2228 /* force CP activities */
2229 r = radeon_ring_lock(rdev, ring, 2);
2230 if (!r) {
2231 /* PACKET2 NOP */
2232 radeon_ring_write(ring, 0x80000000);
2233 radeon_ring_write(ring, 0x80000000);
2234 radeon_ring_unlock_commit(rdev, ring);
2235 }
2236 /* XXX deal with CP0,1,2 */
2237 ring->rptr = RREG32(ring->rptr_reg);
2238 return r100_gpu_cp_is_lockup(rdev, lockup, ring);
2239}
2240
2241static int si_gpu_soft_reset(struct radeon_device *rdev)
2242{
2243 struct evergreen_mc_save save;
2244 u32 grbm_reset = 0;
2245
2246 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2247 return 0;
2248
2249 dev_info(rdev->dev, "GPU softreset \n");
2250 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2251 RREG32(GRBM_STATUS));
2252 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2253 RREG32(GRBM_STATUS2));
2254 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2255 RREG32(GRBM_STATUS_SE0));
2256 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2257 RREG32(GRBM_STATUS_SE1));
2258 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2259 RREG32(SRBM_STATUS));
2260 evergreen_mc_stop(rdev, &save);
2261 if (radeon_mc_wait_for_idle(rdev)) {
2262 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2263 }
2264 /* Disable CP parsing/prefetching */
2265 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2266
2267 /* reset all the gfx blocks */
2268 grbm_reset = (SOFT_RESET_CP |
2269 SOFT_RESET_CB |
2270 SOFT_RESET_DB |
2271 SOFT_RESET_GDS |
2272 SOFT_RESET_PA |
2273 SOFT_RESET_SC |
2274 SOFT_RESET_SPI |
2275 SOFT_RESET_SX |
2276 SOFT_RESET_TC |
2277 SOFT_RESET_TA |
2278 SOFT_RESET_VGT |
2279 SOFT_RESET_IA);
2280
2281 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2282 WREG32(GRBM_SOFT_RESET, grbm_reset);
2283 (void)RREG32(GRBM_SOFT_RESET);
2284 udelay(50);
2285 WREG32(GRBM_SOFT_RESET, 0);
2286 (void)RREG32(GRBM_SOFT_RESET);
2287 /* Wait a little for things to settle down */
2288 udelay(50);
2289 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2290 RREG32(GRBM_STATUS));
2291 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2292 RREG32(GRBM_STATUS2));
2293 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2294 RREG32(GRBM_STATUS_SE0));
2295 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2296 RREG32(GRBM_STATUS_SE1));
2297 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2298 RREG32(SRBM_STATUS));
2299 evergreen_mc_resume(rdev, &save);
2300 return 0;
2301}
2302
2303int si_asic_reset(struct radeon_device *rdev)
2304{
2305 return si_gpu_soft_reset(rdev);
2306}
2307
d2800ee5
AD
2308/* MC */
2309static void si_mc_program(struct radeon_device *rdev)
2310{
2311 struct evergreen_mc_save save;
2312 u32 tmp;
2313 int i, j;
2314
2315 /* Initialize HDP */
2316 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2317 WREG32((0x2c14 + j), 0x00000000);
2318 WREG32((0x2c18 + j), 0x00000000);
2319 WREG32((0x2c1c + j), 0x00000000);
2320 WREG32((0x2c20 + j), 0x00000000);
2321 WREG32((0x2c24 + j), 0x00000000);
2322 }
2323 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2324
2325 evergreen_mc_stop(rdev, &save);
2326 if (radeon_mc_wait_for_idle(rdev)) {
2327 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2328 }
2329 /* Lockout access through VGA aperture*/
2330 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2331 /* Update configuration */
2332 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2333 rdev->mc.vram_start >> 12);
2334 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2335 rdev->mc.vram_end >> 12);
2336 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2337 rdev->vram_scratch.gpu_addr >> 12);
2338 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2339 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2340 WREG32(MC_VM_FB_LOCATION, tmp);
2341 /* XXX double check these! */
2342 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2343 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2344 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2345 WREG32(MC_VM_AGP_BASE, 0);
2346 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2347 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2348 if (radeon_mc_wait_for_idle(rdev)) {
2349 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2350 }
2351 evergreen_mc_resume(rdev, &save);
2352 /* we need to own VRAM, so turn off the VGA renderer here
2353 * to stop it overwriting our objects */
2354 rv515_vga_render_disable(rdev);
2355}
2356
2357/* SI MC address space is 40 bits */
2358static void si_vram_location(struct radeon_device *rdev,
2359 struct radeon_mc *mc, u64 base)
2360{
2361 mc->vram_start = base;
2362 if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2363 dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2364 mc->real_vram_size = mc->aper_size;
2365 mc->mc_vram_size = mc->aper_size;
2366 }
2367 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2368 dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
2369 mc->mc_vram_size >> 20, mc->vram_start,
2370 mc->vram_end, mc->real_vram_size >> 20);
2371}
2372
2373static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2374{
2375 u64 size_af, size_bf;
2376
2377 size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2378 size_bf = mc->vram_start & ~mc->gtt_base_align;
2379 if (size_bf > size_af) {
2380 if (mc->gtt_size > size_bf) {
2381 dev_warn(rdev->dev, "limiting GTT\n");
2382 mc->gtt_size = size_bf;
2383 }
2384 mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2385 } else {
2386 if (mc->gtt_size > size_af) {
2387 dev_warn(rdev->dev, "limiting GTT\n");
2388 mc->gtt_size = size_af;
2389 }
2390 mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2391 }
2392 mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2393 dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
2394 mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
2395}
2396
2397static void si_vram_gtt_location(struct radeon_device *rdev,
2398 struct radeon_mc *mc)
2399{
2400 if (mc->mc_vram_size > 0xFFC0000000ULL) {
2401 /* leave room for at least 1024M GTT */
2402 dev_warn(rdev->dev, "limiting VRAM\n");
2403 mc->real_vram_size = 0xFFC0000000ULL;
2404 mc->mc_vram_size = 0xFFC0000000ULL;
2405 }
2406 si_vram_location(rdev, &rdev->mc, 0);
2407 rdev->mc.gtt_base_align = 0;
2408 si_gtt_location(rdev, mc);
2409}
2410
2411static int si_mc_init(struct radeon_device *rdev)
2412{
2413 u32 tmp;
2414 int chansize, numchan;
2415
2416 /* Get VRAM informations */
2417 rdev->mc.vram_is_ddr = true;
2418 tmp = RREG32(MC_ARB_RAMCFG);
2419 if (tmp & CHANSIZE_OVERRIDE) {
2420 chansize = 16;
2421 } else if (tmp & CHANSIZE_MASK) {
2422 chansize = 64;
2423 } else {
2424 chansize = 32;
2425 }
2426 tmp = RREG32(MC_SHARED_CHMAP);
2427 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2428 case 0:
2429 default:
2430 numchan = 1;
2431 break;
2432 case 1:
2433 numchan = 2;
2434 break;
2435 case 2:
2436 numchan = 4;
2437 break;
2438 case 3:
2439 numchan = 8;
2440 break;
2441 case 4:
2442 numchan = 3;
2443 break;
2444 case 5:
2445 numchan = 6;
2446 break;
2447 case 6:
2448 numchan = 10;
2449 break;
2450 case 7:
2451 numchan = 12;
2452 break;
2453 case 8:
2454 numchan = 16;
2455 break;
2456 }
2457 rdev->mc.vram_width = numchan * chansize;
2458 /* Could aper size report 0 ? */
2459 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2460 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2461 /* size in MB on si */
2462 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2463 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2464 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2465 si_vram_gtt_location(rdev, &rdev->mc);
2466 radeon_update_bandwidth_info(rdev);
2467
2468 return 0;
2469}
2470
2471/*
2472 * GART
2473 */
2474void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
2475{
2476 /* flush hdp cache */
2477 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2478
2479 /* bits 0-15 are the VM contexts0-15 */
2480 WREG32(VM_INVALIDATE_REQUEST, 1);
2481}
2482
2483int si_pcie_gart_enable(struct radeon_device *rdev)
2484{
2485 int r, i;
2486
2487 if (rdev->gart.robj == NULL) {
2488 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2489 return -EINVAL;
2490 }
2491 r = radeon_gart_table_vram_pin(rdev);
2492 if (r)
2493 return r;
2494 radeon_gart_restore(rdev);
2495 /* Setup TLB control */
2496 WREG32(MC_VM_MX_L1_TLB_CNTL,
2497 (0xA << 7) |
2498 ENABLE_L1_TLB |
2499 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2500 ENABLE_ADVANCED_DRIVER_MODEL |
2501 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2502 /* Setup L2 cache */
2503 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2504 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2505 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2506 EFFECTIVE_L2_QUEUE_SIZE(7) |
2507 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2508 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2509 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2510 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2511 /* setup context0 */
2512 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2513 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2514 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2515 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2516 (u32)(rdev->dummy_page.addr >> 12));
2517 WREG32(VM_CONTEXT0_CNTL2, 0);
2518 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2519 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2520
2521 WREG32(0x15D4, 0);
2522 WREG32(0x15D8, 0);
2523 WREG32(0x15DC, 0);
2524
2525 /* empty context1-15 */
2526 /* FIXME start with 1G, once using 2 level pt switch to full
2527 * vm size space
2528 */
2529 /* set vm size, must be a multiple of 4 */
2530 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2531 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
2532 for (i = 1; i < 16; i++) {
2533 if (i < 8)
2534 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2535 rdev->gart.table_addr >> 12);
2536 else
2537 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2538 rdev->gart.table_addr >> 12);
2539 }
2540
2541 /* enable context1-15 */
2542 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2543 (u32)(rdev->dummy_page.addr >> 12));
2544 WREG32(VM_CONTEXT1_CNTL2, 0);
2545 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2546 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
2547
2548 si_pcie_gart_tlb_flush(rdev);
2549 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2550 (unsigned)(rdev->mc.gtt_size >> 20),
2551 (unsigned long long)rdev->gart.table_addr);
2552 rdev->gart.ready = true;
2553 return 0;
2554}
2555
2556void si_pcie_gart_disable(struct radeon_device *rdev)
2557{
2558 /* Disable all tables */
2559 WREG32(VM_CONTEXT0_CNTL, 0);
2560 WREG32(VM_CONTEXT1_CNTL, 0);
2561 /* Setup TLB control */
2562 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2563 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2564 /* Setup L2 cache */
2565 WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2566 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2567 EFFECTIVE_L2_QUEUE_SIZE(7) |
2568 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2569 WREG32(VM_L2_CNTL2, 0);
2570 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2571 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2572 radeon_gart_table_vram_unpin(rdev);
2573}
2574
2575void si_pcie_gart_fini(struct radeon_device *rdev)
2576{
2577 si_pcie_gart_disable(rdev);
2578 radeon_gart_table_vram_free(rdev);
2579 radeon_gart_fini(rdev);
2580}
2581
498dd8b3
AD
2582/* vm parser */
2583static bool si_vm_reg_valid(u32 reg)
2584{
2585 /* context regs are fine */
2586 if (reg >= 0x28000)
2587 return true;
2588
2589 /* check config regs */
2590 switch (reg) {
2591 case GRBM_GFX_INDEX:
2592 case VGT_VTX_VECT_EJECT_REG:
2593 case VGT_CACHE_INVALIDATION:
2594 case VGT_ESGS_RING_SIZE:
2595 case VGT_GSVS_RING_SIZE:
2596 case VGT_GS_VERTEX_REUSE:
2597 case VGT_PRIMITIVE_TYPE:
2598 case VGT_INDEX_TYPE:
2599 case VGT_NUM_INDICES:
2600 case VGT_NUM_INSTANCES:
2601 case VGT_TF_RING_SIZE:
2602 case VGT_HS_OFFCHIP_PARAM:
2603 case VGT_TF_MEMORY_BASE:
2604 case PA_CL_ENHANCE:
2605 case PA_SU_LINE_STIPPLE_VALUE:
2606 case PA_SC_LINE_STIPPLE_STATE:
2607 case PA_SC_ENHANCE:
2608 case SQC_CACHES:
2609 case SPI_STATIC_THREAD_MGMT_1:
2610 case SPI_STATIC_THREAD_MGMT_2:
2611 case SPI_STATIC_THREAD_MGMT_3:
2612 case SPI_PS_MAX_WAVE_ID:
2613 case SPI_CONFIG_CNTL:
2614 case SPI_CONFIG_CNTL_1:
2615 case TA_CNTL_AUX:
2616 return true;
2617 default:
2618 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2619 return false;
2620 }
2621}
2622
2623static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2624 u32 *ib, struct radeon_cs_packet *pkt)
2625{
2626 switch (pkt->opcode) {
2627 case PACKET3_NOP:
2628 case PACKET3_SET_BASE:
2629 case PACKET3_SET_CE_DE_COUNTERS:
2630 case PACKET3_LOAD_CONST_RAM:
2631 case PACKET3_WRITE_CONST_RAM:
2632 case PACKET3_WRITE_CONST_RAM_OFFSET:
2633 case PACKET3_DUMP_CONST_RAM:
2634 case PACKET3_INCREMENT_CE_COUNTER:
2635 case PACKET3_WAIT_ON_DE_COUNTER:
2636 case PACKET3_CE_WRITE:
2637 break;
2638 default:
2639 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2640 return -EINVAL;
2641 }
2642 return 0;
2643}
2644
2645static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2646 u32 *ib, struct radeon_cs_packet *pkt)
2647{
2648 u32 idx = pkt->idx + 1;
2649 u32 idx_value = ib[idx];
2650 u32 start_reg, end_reg, reg, i;
2651
2652 switch (pkt->opcode) {
2653 case PACKET3_NOP:
2654 case PACKET3_SET_BASE:
2655 case PACKET3_CLEAR_STATE:
2656 case PACKET3_INDEX_BUFFER_SIZE:
2657 case PACKET3_DISPATCH_DIRECT:
2658 case PACKET3_DISPATCH_INDIRECT:
2659 case PACKET3_ALLOC_GDS:
2660 case PACKET3_WRITE_GDS_RAM:
2661 case PACKET3_ATOMIC_GDS:
2662 case PACKET3_ATOMIC:
2663 case PACKET3_OCCLUSION_QUERY:
2664 case PACKET3_SET_PREDICATION:
2665 case PACKET3_COND_EXEC:
2666 case PACKET3_PRED_EXEC:
2667 case PACKET3_DRAW_INDIRECT:
2668 case PACKET3_DRAW_INDEX_INDIRECT:
2669 case PACKET3_INDEX_BASE:
2670 case PACKET3_DRAW_INDEX_2:
2671 case PACKET3_CONTEXT_CONTROL:
2672 case PACKET3_INDEX_TYPE:
2673 case PACKET3_DRAW_INDIRECT_MULTI:
2674 case PACKET3_DRAW_INDEX_AUTO:
2675 case PACKET3_DRAW_INDEX_IMMD:
2676 case PACKET3_NUM_INSTANCES:
2677 case PACKET3_DRAW_INDEX_MULTI_AUTO:
2678 case PACKET3_STRMOUT_BUFFER_UPDATE:
2679 case PACKET3_DRAW_INDEX_OFFSET_2:
2680 case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2681 case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2682 case PACKET3_MPEG_INDEX:
2683 case PACKET3_WAIT_REG_MEM:
2684 case PACKET3_MEM_WRITE:
2685 case PACKET3_PFP_SYNC_ME:
2686 case PACKET3_SURFACE_SYNC:
2687 case PACKET3_EVENT_WRITE:
2688 case PACKET3_EVENT_WRITE_EOP:
2689 case PACKET3_EVENT_WRITE_EOS:
2690 case PACKET3_SET_CONTEXT_REG:
2691 case PACKET3_SET_CONTEXT_REG_INDIRECT:
2692 case PACKET3_SET_SH_REG:
2693 case PACKET3_SET_SH_REG_OFFSET:
2694 case PACKET3_INCREMENT_DE_COUNTER:
2695 case PACKET3_WAIT_ON_CE_COUNTER:
2696 case PACKET3_WAIT_ON_AVAIL_BUFFER:
2697 case PACKET3_ME_WRITE:
2698 break;
2699 case PACKET3_COPY_DATA:
2700 if ((idx_value & 0xf00) == 0) {
2701 reg = ib[idx + 3] * 4;
2702 if (!si_vm_reg_valid(reg))
2703 return -EINVAL;
2704 }
2705 break;
2706 case PACKET3_WRITE_DATA:
2707 if ((idx_value & 0xf00) == 0) {
2708 start_reg = ib[idx + 1] * 4;
2709 if (idx_value & 0x10000) {
2710 if (!si_vm_reg_valid(start_reg))
2711 return -EINVAL;
2712 } else {
2713 for (i = 0; i < (pkt->count - 2); i++) {
2714 reg = start_reg + (4 * i);
2715 if (!si_vm_reg_valid(reg))
2716 return -EINVAL;
2717 }
2718 }
2719 }
2720 break;
2721 case PACKET3_COND_WRITE:
2722 if (idx_value & 0x100) {
2723 reg = ib[idx + 5] * 4;
2724 if (!si_vm_reg_valid(reg))
2725 return -EINVAL;
2726 }
2727 break;
2728 case PACKET3_COPY_DW:
2729 if (idx_value & 0x2) {
2730 reg = ib[idx + 3] * 4;
2731 if (!si_vm_reg_valid(reg))
2732 return -EINVAL;
2733 }
2734 break;
2735 case PACKET3_SET_CONFIG_REG:
2736 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2737 end_reg = 4 * pkt->count + start_reg - 4;
2738 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2739 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2740 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2741 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2742 return -EINVAL;
2743 }
2744 for (i = 0; i < pkt->count; i++) {
2745 reg = start_reg + (4 * i);
2746 if (!si_vm_reg_valid(reg))
2747 return -EINVAL;
2748 }
2749 break;
2750 default:
2751 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2752 return -EINVAL;
2753 }
2754 return 0;
2755}
2756
2757static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2758 u32 *ib, struct radeon_cs_packet *pkt)
2759{
2760 u32 idx = pkt->idx + 1;
2761 u32 idx_value = ib[idx];
2762 u32 start_reg, reg, i;
2763
2764 switch (pkt->opcode) {
2765 case PACKET3_NOP:
2766 case PACKET3_SET_BASE:
2767 case PACKET3_CLEAR_STATE:
2768 case PACKET3_DISPATCH_DIRECT:
2769 case PACKET3_DISPATCH_INDIRECT:
2770 case PACKET3_ALLOC_GDS:
2771 case PACKET3_WRITE_GDS_RAM:
2772 case PACKET3_ATOMIC_GDS:
2773 case PACKET3_ATOMIC:
2774 case PACKET3_OCCLUSION_QUERY:
2775 case PACKET3_SET_PREDICATION:
2776 case PACKET3_COND_EXEC:
2777 case PACKET3_PRED_EXEC:
2778 case PACKET3_CONTEXT_CONTROL:
2779 case PACKET3_STRMOUT_BUFFER_UPDATE:
2780 case PACKET3_WAIT_REG_MEM:
2781 case PACKET3_MEM_WRITE:
2782 case PACKET3_PFP_SYNC_ME:
2783 case PACKET3_SURFACE_SYNC:
2784 case PACKET3_EVENT_WRITE:
2785 case PACKET3_EVENT_WRITE_EOP:
2786 case PACKET3_EVENT_WRITE_EOS:
2787 case PACKET3_SET_CONTEXT_REG:
2788 case PACKET3_SET_CONTEXT_REG_INDIRECT:
2789 case PACKET3_SET_SH_REG:
2790 case PACKET3_SET_SH_REG_OFFSET:
2791 case PACKET3_INCREMENT_DE_COUNTER:
2792 case PACKET3_WAIT_ON_CE_COUNTER:
2793 case PACKET3_WAIT_ON_AVAIL_BUFFER:
2794 case PACKET3_ME_WRITE:
2795 break;
2796 case PACKET3_COPY_DATA:
2797 if ((idx_value & 0xf00) == 0) {
2798 reg = ib[idx + 3] * 4;
2799 if (!si_vm_reg_valid(reg))
2800 return -EINVAL;
2801 }
2802 break;
2803 case PACKET3_WRITE_DATA:
2804 if ((idx_value & 0xf00) == 0) {
2805 start_reg = ib[idx + 1] * 4;
2806 if (idx_value & 0x10000) {
2807 if (!si_vm_reg_valid(start_reg))
2808 return -EINVAL;
2809 } else {
2810 for (i = 0; i < (pkt->count - 2); i++) {
2811 reg = start_reg + (4 * i);
2812 if (!si_vm_reg_valid(reg))
2813 return -EINVAL;
2814 }
2815 }
2816 }
2817 break;
2818 case PACKET3_COND_WRITE:
2819 if (idx_value & 0x100) {
2820 reg = ib[idx + 5] * 4;
2821 if (!si_vm_reg_valid(reg))
2822 return -EINVAL;
2823 }
2824 break;
2825 case PACKET3_COPY_DW:
2826 if (idx_value & 0x2) {
2827 reg = ib[idx + 3] * 4;
2828 if (!si_vm_reg_valid(reg))
2829 return -EINVAL;
2830 }
2831 break;
2832 default:
2833 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2834 return -EINVAL;
2835 }
2836 return 0;
2837}
2838
2839int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2840{
2841 int ret = 0;
2842 u32 idx = 0;
2843 struct radeon_cs_packet pkt;
2844
2845 do {
2846 pkt.idx = idx;
2847 pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2848 pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2849 pkt.one_reg_wr = 0;
2850 switch (pkt.type) {
2851 case PACKET_TYPE0:
2852 dev_err(rdev->dev, "Packet0 not allowed!\n");
2853 ret = -EINVAL;
2854 break;
2855 case PACKET_TYPE2:
2856 idx += 1;
2857 break;
2858 case PACKET_TYPE3:
2859 pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2860 if (ib->is_const_ib)
2861 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2862 else {
2863 switch (ib->fence->ring) {
2864 case RADEON_RING_TYPE_GFX_INDEX:
2865 ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2866 break;
2867 case CAYMAN_RING_TYPE_CP1_INDEX:
2868 case CAYMAN_RING_TYPE_CP2_INDEX:
2869 ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2870 break;
2871 default:
2872 dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
2873 ret = -EINVAL;
2874 break;
2875 }
2876 }
2877 idx += pkt.count + 2;
2878 break;
2879 default:
2880 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2881 ret = -EINVAL;
2882 break;
2883 }
2884 if (ret)
2885 break;
2886 } while (idx < ib->length_dw);
2887
2888 return ret;
2889}
2890
d2800ee5
AD
2891/*
2892 * vm
2893 */
2894int si_vm_init(struct radeon_device *rdev)
2895{
2896 /* number of VMs */
2897 rdev->vm_manager.nvm = 16;
2898 /* base offset of vram pages */
2899 rdev->vm_manager.vram_base_offset = 0;
2900
2901 return 0;
2902}
2903
2904void si_vm_fini(struct radeon_device *rdev)
2905{
2906}
2907
2908int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
2909{
2910 if (id < 8)
2911 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
2912 else
2913 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
2914 vm->pt_gpu_addr >> 12);
2915 /* flush hdp cache */
2916 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2917 /* bits 0-15 are the VM contexts0-15 */
2918 WREG32(VM_INVALIDATE_REQUEST, 1 << id);
2919 return 0;
2920}
2921
2922void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
2923{
2924 if (vm->id < 8)
2925 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
2926 else
2927 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0);
2928 /* flush hdp cache */
2929 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2930 /* bits 0-15 are the VM contexts0-15 */
2931 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2932}
2933
2934void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
2935{
2936 if (vm->id == -1)
2937 return;
2938
2939 /* flush hdp cache */
2940 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2941 /* bits 0-15 are the VM contexts0-15 */
2942 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2943}
2944
347e7592
AD
2945/*
2946 * RLC
2947 */
2948static void si_rlc_fini(struct radeon_device *rdev)
2949{
2950 int r;
2951
2952 /* save restore block */
2953 if (rdev->rlc.save_restore_obj) {
2954 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2955 if (unlikely(r != 0))
2956 dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
2957 radeon_bo_unpin(rdev->rlc.save_restore_obj);
2958 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
2959
2960 radeon_bo_unref(&rdev->rlc.save_restore_obj);
2961 rdev->rlc.save_restore_obj = NULL;
2962 }
2963
2964 /* clear state block */
2965 if (rdev->rlc.clear_state_obj) {
2966 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
2967 if (unlikely(r != 0))
2968 dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
2969 radeon_bo_unpin(rdev->rlc.clear_state_obj);
2970 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
2971
2972 radeon_bo_unref(&rdev->rlc.clear_state_obj);
2973 rdev->rlc.clear_state_obj = NULL;
2974 }
2975}
2976
2977static int si_rlc_init(struct radeon_device *rdev)
2978{
2979 int r;
2980
2981 /* save restore block */
2982 if (rdev->rlc.save_restore_obj == NULL) {
2983 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
2984 RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.save_restore_obj);
2985 if (r) {
2986 dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
2987 return r;
2988 }
2989 }
2990
2991 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2992 if (unlikely(r != 0)) {
2993 si_rlc_fini(rdev);
2994 return r;
2995 }
2996 r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
2997 &rdev->rlc.save_restore_gpu_addr);
2998 if (r) {
2999 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
3000 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
3001 si_rlc_fini(rdev);
3002 return r;
3003 }
3004
3005 /* clear state block */
3006 if (rdev->rlc.clear_state_obj == NULL) {
3007 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
3008 RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.clear_state_obj);
3009 if (r) {
3010 dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
3011 si_rlc_fini(rdev);
3012 return r;
3013 }
3014 }
3015 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
3016 if (unlikely(r != 0)) {
3017 si_rlc_fini(rdev);
3018 return r;
3019 }
3020 r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
3021 &rdev->rlc.clear_state_gpu_addr);
3022 if (r) {
3023
3024 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
3025 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
3026 si_rlc_fini(rdev);
3027 return r;
3028 }
3029
3030 return 0;
3031}
3032
3033static void si_rlc_stop(struct radeon_device *rdev)
3034{
3035 WREG32(RLC_CNTL, 0);
3036}
3037
3038static void si_rlc_start(struct radeon_device *rdev)
3039{
3040 WREG32(RLC_CNTL, RLC_ENABLE);
3041}
3042
3043static int si_rlc_resume(struct radeon_device *rdev)
3044{
3045 u32 i;
3046 const __be32 *fw_data;
3047
3048 if (!rdev->rlc_fw)
3049 return -EINVAL;
3050
3051 si_rlc_stop(rdev);
3052
3053 WREG32(RLC_RL_BASE, 0);
3054 WREG32(RLC_RL_SIZE, 0);
3055 WREG32(RLC_LB_CNTL, 0);
3056 WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
3057 WREG32(RLC_LB_CNTR_INIT, 0);
3058
3059 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
3060 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
3061
3062 WREG32(RLC_MC_CNTL, 0);
3063 WREG32(RLC_UCODE_CNTL, 0);
3064
3065 fw_data = (const __be32 *)rdev->rlc_fw->data;
3066 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
3067 WREG32(RLC_UCODE_ADDR, i);
3068 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
3069 }
3070 WREG32(RLC_UCODE_ADDR, 0);
3071
3072 si_rlc_start(rdev);
3073
3074 return 0;
3075}
3076
25a857fb
AD
3077static void si_enable_interrupts(struct radeon_device *rdev)
3078{
3079 u32 ih_cntl = RREG32(IH_CNTL);
3080 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3081
3082 ih_cntl |= ENABLE_INTR;
3083 ih_rb_cntl |= IH_RB_ENABLE;
3084 WREG32(IH_CNTL, ih_cntl);
3085 WREG32(IH_RB_CNTL, ih_rb_cntl);
3086 rdev->ih.enabled = true;
3087}
3088
3089static void si_disable_interrupts(struct radeon_device *rdev)
3090{
3091 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3092 u32 ih_cntl = RREG32(IH_CNTL);
3093
3094 ih_rb_cntl &= ~IH_RB_ENABLE;
3095 ih_cntl &= ~ENABLE_INTR;
3096 WREG32(IH_RB_CNTL, ih_rb_cntl);
3097 WREG32(IH_CNTL, ih_cntl);
3098 /* set rptr, wptr to 0 */
3099 WREG32(IH_RB_RPTR, 0);
3100 WREG32(IH_RB_WPTR, 0);
3101 rdev->ih.enabled = false;
3102 rdev->ih.wptr = 0;
3103 rdev->ih.rptr = 0;
3104}
3105
3106static void si_disable_interrupt_state(struct radeon_device *rdev)
3107{
3108 u32 tmp;
3109
3110 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3111 WREG32(CP_INT_CNTL_RING1, 0);
3112 WREG32(CP_INT_CNTL_RING2, 0);
3113 WREG32(GRBM_INT_CNTL, 0);
3114 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3115 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3116 if (rdev->num_crtc >= 4) {
3117 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3118 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3119 }
3120 if (rdev->num_crtc >= 6) {
3121 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3122 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3123 }
3124
3125 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3126 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3127 if (rdev->num_crtc >= 4) {
3128 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3129 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3130 }
3131 if (rdev->num_crtc >= 6) {
3132 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3133 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3134 }
3135
3136 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
3137
3138 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3139 WREG32(DC_HPD1_INT_CONTROL, tmp);
3140 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3141 WREG32(DC_HPD2_INT_CONTROL, tmp);
3142 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3143 WREG32(DC_HPD3_INT_CONTROL, tmp);
3144 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3145 WREG32(DC_HPD4_INT_CONTROL, tmp);
3146 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3147 WREG32(DC_HPD5_INT_CONTROL, tmp);
3148 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3149 WREG32(DC_HPD6_INT_CONTROL, tmp);
3150
3151}
3152
3153static int si_irq_init(struct radeon_device *rdev)
3154{
3155 int ret = 0;
3156 int rb_bufsz;
3157 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3158
3159 /* allocate ring */
3160 ret = r600_ih_ring_alloc(rdev);
3161 if (ret)
3162 return ret;
3163
3164 /* disable irqs */
3165 si_disable_interrupts(rdev);
3166
3167 /* init rlc */
3168 ret = si_rlc_resume(rdev);
3169 if (ret) {
3170 r600_ih_ring_fini(rdev);
3171 return ret;
3172 }
3173
3174 /* setup interrupt control */
3175 /* set dummy read address to ring address */
3176 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3177 interrupt_cntl = RREG32(INTERRUPT_CNTL);
3178 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3179 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3180 */
3181 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3182 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3183 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3184 WREG32(INTERRUPT_CNTL, interrupt_cntl);
3185
3186 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3187 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3188
3189 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3190 IH_WPTR_OVERFLOW_CLEAR |
3191 (rb_bufsz << 1));
3192
3193 if (rdev->wb.enabled)
3194 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3195
3196 /* set the writeback address whether it's enabled or not */
3197 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3198 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3199
3200 WREG32(IH_RB_CNTL, ih_rb_cntl);
3201
3202 /* set rptr, wptr to 0 */
3203 WREG32(IH_RB_RPTR, 0);
3204 WREG32(IH_RB_WPTR, 0);
3205
3206 /* Default settings for IH_CNTL (disabled at first) */
3207 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3208 /* RPTR_REARM only works if msi's are enabled */
3209 if (rdev->msi_enabled)
3210 ih_cntl |= RPTR_REARM;
3211 WREG32(IH_CNTL, ih_cntl);
3212
3213 /* force the active interrupt state to all disabled */
3214 si_disable_interrupt_state(rdev);
3215
3216 /* enable irqs */
3217 si_enable_interrupts(rdev);
3218
3219 return ret;
3220}
3221
3222int si_irq_set(struct radeon_device *rdev)
3223{
3224 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
3225 u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
3226 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3227 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3228 u32 grbm_int_cntl = 0;
3229 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3230
3231 if (!rdev->irq.installed) {
3232 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3233 return -EINVAL;
3234 }
3235 /* don't enable anything if the ih is disabled */
3236 if (!rdev->ih.enabled) {
3237 si_disable_interrupts(rdev);
3238 /* force the active interrupt state to all disabled */
3239 si_disable_interrupt_state(rdev);
3240 return 0;
3241 }
3242
3243 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3244 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3245 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3246 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3247 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3248 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3249
3250 /* enable CP interrupts on all rings */
3251 if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
3252 DRM_DEBUG("si_irq_set: sw int gfx\n");
3253 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3254 }
3255 if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) {
3256 DRM_DEBUG("si_irq_set: sw int cp1\n");
3257 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
3258 }
3259 if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) {
3260 DRM_DEBUG("si_irq_set: sw int cp2\n");
3261 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3262 }
3263 if (rdev->irq.crtc_vblank_int[0] ||
3264 rdev->irq.pflip[0]) {
3265 DRM_DEBUG("si_irq_set: vblank 0\n");
3266 crtc1 |= VBLANK_INT_MASK;
3267 }
3268 if (rdev->irq.crtc_vblank_int[1] ||
3269 rdev->irq.pflip[1]) {
3270 DRM_DEBUG("si_irq_set: vblank 1\n");
3271 crtc2 |= VBLANK_INT_MASK;
3272 }
3273 if (rdev->irq.crtc_vblank_int[2] ||
3274 rdev->irq.pflip[2]) {
3275 DRM_DEBUG("si_irq_set: vblank 2\n");
3276 crtc3 |= VBLANK_INT_MASK;
3277 }
3278 if (rdev->irq.crtc_vblank_int[3] ||
3279 rdev->irq.pflip[3]) {
3280 DRM_DEBUG("si_irq_set: vblank 3\n");
3281 crtc4 |= VBLANK_INT_MASK;
3282 }
3283 if (rdev->irq.crtc_vblank_int[4] ||
3284 rdev->irq.pflip[4]) {
3285 DRM_DEBUG("si_irq_set: vblank 4\n");
3286 crtc5 |= VBLANK_INT_MASK;
3287 }
3288 if (rdev->irq.crtc_vblank_int[5] ||
3289 rdev->irq.pflip[5]) {
3290 DRM_DEBUG("si_irq_set: vblank 5\n");
3291 crtc6 |= VBLANK_INT_MASK;
3292 }
3293 if (rdev->irq.hpd[0]) {
3294 DRM_DEBUG("si_irq_set: hpd 1\n");
3295 hpd1 |= DC_HPDx_INT_EN;
3296 }
3297 if (rdev->irq.hpd[1]) {
3298 DRM_DEBUG("si_irq_set: hpd 2\n");
3299 hpd2 |= DC_HPDx_INT_EN;
3300 }
3301 if (rdev->irq.hpd[2]) {
3302 DRM_DEBUG("si_irq_set: hpd 3\n");
3303 hpd3 |= DC_HPDx_INT_EN;
3304 }
3305 if (rdev->irq.hpd[3]) {
3306 DRM_DEBUG("si_irq_set: hpd 4\n");
3307 hpd4 |= DC_HPDx_INT_EN;
3308 }
3309 if (rdev->irq.hpd[4]) {
3310 DRM_DEBUG("si_irq_set: hpd 5\n");
3311 hpd5 |= DC_HPDx_INT_EN;
3312 }
3313 if (rdev->irq.hpd[5]) {
3314 DRM_DEBUG("si_irq_set: hpd 6\n");
3315 hpd6 |= DC_HPDx_INT_EN;
3316 }
3317 if (rdev->irq.gui_idle) {
3318 DRM_DEBUG("gui idle\n");
3319 grbm_int_cntl |= GUI_IDLE_INT_ENABLE;
3320 }
3321
3322 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3323 WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3324 WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3325
3326 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3327
3328 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3329 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3330 if (rdev->num_crtc >= 4) {
3331 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3332 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3333 }
3334 if (rdev->num_crtc >= 6) {
3335 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3336 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3337 }
3338
3339 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
3340 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
3341 if (rdev->num_crtc >= 4) {
3342 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
3343 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
3344 }
3345 if (rdev->num_crtc >= 6) {
3346 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
3347 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
3348 }
3349
3350 WREG32(DC_HPD1_INT_CONTROL, hpd1);
3351 WREG32(DC_HPD2_INT_CONTROL, hpd2);
3352 WREG32(DC_HPD3_INT_CONTROL, hpd3);
3353 WREG32(DC_HPD4_INT_CONTROL, hpd4);
3354 WREG32(DC_HPD5_INT_CONTROL, hpd5);
3355 WREG32(DC_HPD6_INT_CONTROL, hpd6);
3356
3357 return 0;
3358}
3359
3360static inline void si_irq_ack(struct radeon_device *rdev)
3361{
3362 u32 tmp;
3363
3364 rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3365 rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3366 rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3367 rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3368 rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3369 rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3370 rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
3371 rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
3372 if (rdev->num_crtc >= 4) {
3373 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
3374 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
3375 }
3376 if (rdev->num_crtc >= 6) {
3377 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
3378 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
3379 }
3380
3381 if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
3382 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3383 if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
3384 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3385 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
3386 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3387 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
3388 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3389 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3390 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3391 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3392 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3393
3394 if (rdev->num_crtc >= 4) {
3395 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
3396 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3397 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
3398 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3399 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3400 WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3401 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3402 WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3403 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3404 WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3405 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3406 WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3407 }
3408
3409 if (rdev->num_crtc >= 6) {
3410 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
3411 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3412 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
3413 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3414 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3415 WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3416 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3417 WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3418 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3419 WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3420 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3421 WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3422 }
3423
3424 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3425 tmp = RREG32(DC_HPD1_INT_CONTROL);
3426 tmp |= DC_HPDx_INT_ACK;
3427 WREG32(DC_HPD1_INT_CONTROL, tmp);
3428 }
3429 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3430 tmp = RREG32(DC_HPD2_INT_CONTROL);
3431 tmp |= DC_HPDx_INT_ACK;
3432 WREG32(DC_HPD2_INT_CONTROL, tmp);
3433 }
3434 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3435 tmp = RREG32(DC_HPD3_INT_CONTROL);
3436 tmp |= DC_HPDx_INT_ACK;
3437 WREG32(DC_HPD3_INT_CONTROL, tmp);
3438 }
3439 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3440 tmp = RREG32(DC_HPD4_INT_CONTROL);
3441 tmp |= DC_HPDx_INT_ACK;
3442 WREG32(DC_HPD4_INT_CONTROL, tmp);
3443 }
3444 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3445 tmp = RREG32(DC_HPD5_INT_CONTROL);
3446 tmp |= DC_HPDx_INT_ACK;
3447 WREG32(DC_HPD5_INT_CONTROL, tmp);
3448 }
3449 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3450 tmp = RREG32(DC_HPD5_INT_CONTROL);
3451 tmp |= DC_HPDx_INT_ACK;
3452 WREG32(DC_HPD6_INT_CONTROL, tmp);
3453 }
3454}
3455
3456static void si_irq_disable(struct radeon_device *rdev)
3457{
3458 si_disable_interrupts(rdev);
3459 /* Wait and acknowledge irq */
3460 mdelay(1);
3461 si_irq_ack(rdev);
3462 si_disable_interrupt_state(rdev);
3463}
3464
3465static void si_irq_suspend(struct radeon_device *rdev)
3466{
3467 si_irq_disable(rdev);
3468 si_rlc_stop(rdev);
3469}
3470
3471static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
3472{
3473 u32 wptr, tmp;
3474
3475 if (rdev->wb.enabled)
3476 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3477 else
3478 wptr = RREG32(IH_RB_WPTR);
3479
3480 if (wptr & RB_OVERFLOW) {
3481 /* When a ring buffer overflow happen start parsing interrupt
3482 * from the last not overwritten vector (wptr + 16). Hopefully
3483 * this should allow us to catchup.
3484 */
3485 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3486 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3487 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3488 tmp = RREG32(IH_RB_CNTL);
3489 tmp |= IH_WPTR_OVERFLOW_CLEAR;
3490 WREG32(IH_RB_CNTL, tmp);
3491 }
3492 return (wptr & rdev->ih.ptr_mask);
3493}
3494
3495/* SI IV Ring
3496 * Each IV ring entry is 128 bits:
3497 * [7:0] - interrupt source id
3498 * [31:8] - reserved
3499 * [59:32] - interrupt source data
3500 * [63:60] - reserved
3501 * [71:64] - RINGID
3502 * [79:72] - VMID
3503 * [127:80] - reserved
3504 */
3505int si_irq_process(struct radeon_device *rdev)
3506{
3507 u32 wptr;
3508 u32 rptr;
3509 u32 src_id, src_data, ring_id;
3510 u32 ring_index;
3511 unsigned long flags;
3512 bool queue_hotplug = false;
3513
3514 if (!rdev->ih.enabled || rdev->shutdown)
3515 return IRQ_NONE;
3516
3517 wptr = si_get_ih_wptr(rdev);
3518 rptr = rdev->ih.rptr;
3519 DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3520
3521 spin_lock_irqsave(&rdev->ih.lock, flags);
3522 if (rptr == wptr) {
3523 spin_unlock_irqrestore(&rdev->ih.lock, flags);
3524 return IRQ_NONE;
3525 }
3526restart_ih:
3527 /* Order reading of wptr vs. reading of IH ring data */
3528 rmb();
3529
3530 /* display interrupts */
3531 si_irq_ack(rdev);
3532
3533 rdev->ih.wptr = wptr;
3534 while (rptr != wptr) {
3535 /* wptr/rptr are in bytes! */
3536 ring_index = rptr / 4;
3537 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3538 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3539 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3540
3541 switch (src_id) {
3542 case 1: /* D1 vblank/vline */
3543 switch (src_data) {
3544 case 0: /* D1 vblank */
3545 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
3546 if (rdev->irq.crtc_vblank_int[0]) {
3547 drm_handle_vblank(rdev->ddev, 0);
3548 rdev->pm.vblank_sync = true;
3549 wake_up(&rdev->irq.vblank_queue);
3550 }
3551 if (rdev->irq.pflip[0])
3552 radeon_crtc_handle_flip(rdev, 0);
3553 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3554 DRM_DEBUG("IH: D1 vblank\n");
3555 }
3556 break;
3557 case 1: /* D1 vline */
3558 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
3559 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3560 DRM_DEBUG("IH: D1 vline\n");
3561 }
3562 break;
3563 default:
3564 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3565 break;
3566 }
3567 break;
3568 case 2: /* D2 vblank/vline */
3569 switch (src_data) {
3570 case 0: /* D2 vblank */
3571 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3572 if (rdev->irq.crtc_vblank_int[1]) {
3573 drm_handle_vblank(rdev->ddev, 1);
3574 rdev->pm.vblank_sync = true;
3575 wake_up(&rdev->irq.vblank_queue);
3576 }
3577 if (rdev->irq.pflip[1])
3578 radeon_crtc_handle_flip(rdev, 1);
3579 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3580 DRM_DEBUG("IH: D2 vblank\n");
3581 }
3582 break;
3583 case 1: /* D2 vline */
3584 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3585 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3586 DRM_DEBUG("IH: D2 vline\n");
3587 }
3588 break;
3589 default:
3590 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3591 break;
3592 }
3593 break;
3594 case 3: /* D3 vblank/vline */
3595 switch (src_data) {
3596 case 0: /* D3 vblank */
3597 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3598 if (rdev->irq.crtc_vblank_int[2]) {
3599 drm_handle_vblank(rdev->ddev, 2);
3600 rdev->pm.vblank_sync = true;
3601 wake_up(&rdev->irq.vblank_queue);
3602 }
3603 if (rdev->irq.pflip[2])
3604 radeon_crtc_handle_flip(rdev, 2);
3605 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3606 DRM_DEBUG("IH: D3 vblank\n");
3607 }
3608 break;
3609 case 1: /* D3 vline */
3610 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3611 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3612 DRM_DEBUG("IH: D3 vline\n");
3613 }
3614 break;
3615 default:
3616 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3617 break;
3618 }
3619 break;
3620 case 4: /* D4 vblank/vline */
3621 switch (src_data) {
3622 case 0: /* D4 vblank */
3623 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3624 if (rdev->irq.crtc_vblank_int[3]) {
3625 drm_handle_vblank(rdev->ddev, 3);
3626 rdev->pm.vblank_sync = true;
3627 wake_up(&rdev->irq.vblank_queue);
3628 }
3629 if (rdev->irq.pflip[3])
3630 radeon_crtc_handle_flip(rdev, 3);
3631 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3632 DRM_DEBUG("IH: D4 vblank\n");
3633 }
3634 break;
3635 case 1: /* D4 vline */
3636 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3637 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3638 DRM_DEBUG("IH: D4 vline\n");
3639 }
3640 break;
3641 default:
3642 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3643 break;
3644 }
3645 break;
3646 case 5: /* D5 vblank/vline */
3647 switch (src_data) {
3648 case 0: /* D5 vblank */
3649 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3650 if (rdev->irq.crtc_vblank_int[4]) {
3651 drm_handle_vblank(rdev->ddev, 4);
3652 rdev->pm.vblank_sync = true;
3653 wake_up(&rdev->irq.vblank_queue);
3654 }
3655 if (rdev->irq.pflip[4])
3656 radeon_crtc_handle_flip(rdev, 4);
3657 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3658 DRM_DEBUG("IH: D5 vblank\n");
3659 }
3660 break;
3661 case 1: /* D5 vline */
3662 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3663 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3664 DRM_DEBUG("IH: D5 vline\n");
3665 }
3666 break;
3667 default:
3668 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3669 break;
3670 }
3671 break;
3672 case 6: /* D6 vblank/vline */
3673 switch (src_data) {
3674 case 0: /* D6 vblank */
3675 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3676 if (rdev->irq.crtc_vblank_int[5]) {
3677 drm_handle_vblank(rdev->ddev, 5);
3678 rdev->pm.vblank_sync = true;
3679 wake_up(&rdev->irq.vblank_queue);
3680 }
3681 if (rdev->irq.pflip[5])
3682 radeon_crtc_handle_flip(rdev, 5);
3683 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3684 DRM_DEBUG("IH: D6 vblank\n");
3685 }
3686 break;
3687 case 1: /* D6 vline */
3688 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3689 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3690 DRM_DEBUG("IH: D6 vline\n");
3691 }
3692 break;
3693 default:
3694 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3695 break;
3696 }
3697 break;
3698 case 42: /* HPD hotplug */
3699 switch (src_data) {
3700 case 0:
3701 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3702 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
3703 queue_hotplug = true;
3704 DRM_DEBUG("IH: HPD1\n");
3705 }
3706 break;
3707 case 1:
3708 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3709 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3710 queue_hotplug = true;
3711 DRM_DEBUG("IH: HPD2\n");
3712 }
3713 break;
3714 case 2:
3715 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3716 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3717 queue_hotplug = true;
3718 DRM_DEBUG("IH: HPD3\n");
3719 }
3720 break;
3721 case 3:
3722 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3723 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3724 queue_hotplug = true;
3725 DRM_DEBUG("IH: HPD4\n");
3726 }
3727 break;
3728 case 4:
3729 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3730 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3731 queue_hotplug = true;
3732 DRM_DEBUG("IH: HPD5\n");
3733 }
3734 break;
3735 case 5:
3736 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3737 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3738 queue_hotplug = true;
3739 DRM_DEBUG("IH: HPD6\n");
3740 }
3741 break;
3742 default:
3743 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3744 break;
3745 }
3746 break;
3747 case 176: /* RINGID0 CP_INT */
3748 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3749 break;
3750 case 177: /* RINGID1 CP_INT */
3751 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3752 break;
3753 case 178: /* RINGID2 CP_INT */
3754 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3755 break;
3756 case 181: /* CP EOP event */
3757 DRM_DEBUG("IH: CP EOP\n");
3758 switch (ring_id) {
3759 case 0:
3760 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3761 break;
3762 case 1:
3763 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3764 break;
3765 case 2:
3766 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3767 break;
3768 }
3769 break;
3770 case 233: /* GUI IDLE */
3771 DRM_DEBUG("IH: GUI idle\n");
3772 rdev->pm.gui_idle = true;
3773 wake_up(&rdev->irq.idle_queue);
3774 break;
3775 default:
3776 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3777 break;
3778 }
3779
3780 /* wptr/rptr are in bytes! */
3781 rptr += 16;
3782 rptr &= rdev->ih.ptr_mask;
3783 }
3784 /* make sure wptr hasn't changed while processing */
3785 wptr = si_get_ih_wptr(rdev);
3786 if (wptr != rdev->ih.wptr)
3787 goto restart_ih;
3788 if (queue_hotplug)
3789 schedule_work(&rdev->hotplug_work);
3790 rdev->ih.rptr = rptr;
3791 WREG32(IH_RB_RPTR, rdev->ih.rptr);
3792 spin_unlock_irqrestore(&rdev->ih.lock, flags);
3793 return IRQ_HANDLED;
3794}
3795