drm/radeon/kms: add support for MC ucode loading on SI
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
CommitLineData
43b3cd99
AD
1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
0f0de06c
AD
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
43b3cd99
AD
28#include "drmP.h"
29#include "radeon.h"
30#include "radeon_asic.h"
31#include "radeon_drm.h"
32#include "sid.h"
33#include "atom.h"
34
0f0de06c
AD
35#define SI_PFP_UCODE_SIZE 2144
36#define SI_PM4_UCODE_SIZE 2144
37#define SI_CE_UCODE_SIZE 2144
38#define SI_RLC_UCODE_SIZE 2048
39#define SI_MC_UCODE_SIZE 7769
40
41MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
42MODULE_FIRMWARE("radeon/TAHITI_me.bin");
43MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
44MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
45MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
46MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
47MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
48MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
49MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
50MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
51MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
52MODULE_FIRMWARE("radeon/VERDE_me.bin");
53MODULE_FIRMWARE("radeon/VERDE_ce.bin");
54MODULE_FIRMWARE("radeon/VERDE_mc.bin");
55MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
56
0a96d72b 57extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
c476dde2
AD
58extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
59extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
0a96d72b 60
1bd47d2e
AD
61/* get temperature in millidegrees */
62int si_get_temp(struct radeon_device *rdev)
63{
64 u32 temp;
65 int actual_temp = 0;
66
67 temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
68 CTF_TEMP_SHIFT;
69
70 if (temp & 0x200)
71 actual_temp = 255;
72 else
73 actual_temp = temp & 0x1ff;
74
75 actual_temp = (actual_temp * 1000);
76
77 return actual_temp;
78}
79
8b074dd6
AD
80#define TAHITI_IO_MC_REGS_SIZE 36
81
82static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
83 {0x0000006f, 0x03044000},
84 {0x00000070, 0x0480c018},
85 {0x00000071, 0x00000040},
86 {0x00000072, 0x01000000},
87 {0x00000074, 0x000000ff},
88 {0x00000075, 0x00143400},
89 {0x00000076, 0x08ec0800},
90 {0x00000077, 0x040000cc},
91 {0x00000079, 0x00000000},
92 {0x0000007a, 0x21000409},
93 {0x0000007c, 0x00000000},
94 {0x0000007d, 0xe8000000},
95 {0x0000007e, 0x044408a8},
96 {0x0000007f, 0x00000003},
97 {0x00000080, 0x00000000},
98 {0x00000081, 0x01000000},
99 {0x00000082, 0x02000000},
100 {0x00000083, 0x00000000},
101 {0x00000084, 0xe3f3e4f4},
102 {0x00000085, 0x00052024},
103 {0x00000087, 0x00000000},
104 {0x00000088, 0x66036603},
105 {0x00000089, 0x01000000},
106 {0x0000008b, 0x1c0a0000},
107 {0x0000008c, 0xff010000},
108 {0x0000008e, 0xffffefff},
109 {0x0000008f, 0xfff3efff},
110 {0x00000090, 0xfff3efbf},
111 {0x00000094, 0x00101101},
112 {0x00000095, 0x00000fff},
113 {0x00000096, 0x00116fff},
114 {0x00000097, 0x60010000},
115 {0x00000098, 0x10010000},
116 {0x00000099, 0x00006000},
117 {0x0000009a, 0x00001000},
118 {0x0000009f, 0x00a77400}
119};
120
121static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
122 {0x0000006f, 0x03044000},
123 {0x00000070, 0x0480c018},
124 {0x00000071, 0x00000040},
125 {0x00000072, 0x01000000},
126 {0x00000074, 0x000000ff},
127 {0x00000075, 0x00143400},
128 {0x00000076, 0x08ec0800},
129 {0x00000077, 0x040000cc},
130 {0x00000079, 0x00000000},
131 {0x0000007a, 0x21000409},
132 {0x0000007c, 0x00000000},
133 {0x0000007d, 0xe8000000},
134 {0x0000007e, 0x044408a8},
135 {0x0000007f, 0x00000003},
136 {0x00000080, 0x00000000},
137 {0x00000081, 0x01000000},
138 {0x00000082, 0x02000000},
139 {0x00000083, 0x00000000},
140 {0x00000084, 0xe3f3e4f4},
141 {0x00000085, 0x00052024},
142 {0x00000087, 0x00000000},
143 {0x00000088, 0x66036603},
144 {0x00000089, 0x01000000},
145 {0x0000008b, 0x1c0a0000},
146 {0x0000008c, 0xff010000},
147 {0x0000008e, 0xffffefff},
148 {0x0000008f, 0xfff3efff},
149 {0x00000090, 0xfff3efbf},
150 {0x00000094, 0x00101101},
151 {0x00000095, 0x00000fff},
152 {0x00000096, 0x00116fff},
153 {0x00000097, 0x60010000},
154 {0x00000098, 0x10010000},
155 {0x00000099, 0x00006000},
156 {0x0000009a, 0x00001000},
157 {0x0000009f, 0x00a47400}
158};
159
160static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
161 {0x0000006f, 0x03044000},
162 {0x00000070, 0x0480c018},
163 {0x00000071, 0x00000040},
164 {0x00000072, 0x01000000},
165 {0x00000074, 0x000000ff},
166 {0x00000075, 0x00143400},
167 {0x00000076, 0x08ec0800},
168 {0x00000077, 0x040000cc},
169 {0x00000079, 0x00000000},
170 {0x0000007a, 0x21000409},
171 {0x0000007c, 0x00000000},
172 {0x0000007d, 0xe8000000},
173 {0x0000007e, 0x044408a8},
174 {0x0000007f, 0x00000003},
175 {0x00000080, 0x00000000},
176 {0x00000081, 0x01000000},
177 {0x00000082, 0x02000000},
178 {0x00000083, 0x00000000},
179 {0x00000084, 0xe3f3e4f4},
180 {0x00000085, 0x00052024},
181 {0x00000087, 0x00000000},
182 {0x00000088, 0x66036603},
183 {0x00000089, 0x01000000},
184 {0x0000008b, 0x1c0a0000},
185 {0x0000008c, 0xff010000},
186 {0x0000008e, 0xffffefff},
187 {0x0000008f, 0xfff3efff},
188 {0x00000090, 0xfff3efbf},
189 {0x00000094, 0x00101101},
190 {0x00000095, 0x00000fff},
191 {0x00000096, 0x00116fff},
192 {0x00000097, 0x60010000},
193 {0x00000098, 0x10010000},
194 {0x00000099, 0x00006000},
195 {0x0000009a, 0x00001000},
196 {0x0000009f, 0x00a37400}
197};
198
199/* ucode loading */
200static int si_mc_load_microcode(struct radeon_device *rdev)
201{
202 const __be32 *fw_data;
203 u32 running, blackout = 0;
204 u32 *io_mc_regs;
205 int i, ucode_size, regs_size;
206
207 if (!rdev->mc_fw)
208 return -EINVAL;
209
210 switch (rdev->family) {
211 case CHIP_TAHITI:
212 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
213 ucode_size = SI_MC_UCODE_SIZE;
214 regs_size = TAHITI_IO_MC_REGS_SIZE;
215 break;
216 case CHIP_PITCAIRN:
217 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
218 ucode_size = SI_MC_UCODE_SIZE;
219 regs_size = TAHITI_IO_MC_REGS_SIZE;
220 break;
221 case CHIP_VERDE:
222 default:
223 io_mc_regs = (u32 *)&verde_io_mc_regs;
224 ucode_size = SI_MC_UCODE_SIZE;
225 regs_size = TAHITI_IO_MC_REGS_SIZE;
226 break;
227 }
228
229 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
230
231 if (running == 0) {
232 if (running) {
233 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
234 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
235 }
236
237 /* reset the engine and set to writable */
238 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
239 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
240
241 /* load mc io regs */
242 for (i = 0; i < regs_size; i++) {
243 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
244 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
245 }
246 /* load the MC ucode */
247 fw_data = (const __be32 *)rdev->mc_fw->data;
248 for (i = 0; i < ucode_size; i++)
249 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
250
251 /* put the engine back into the active state */
252 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
253 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
254 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
255
256 /* wait for training to complete */
257 for (i = 0; i < rdev->usec_timeout; i++) {
258 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
259 break;
260 udelay(1);
261 }
262 for (i = 0; i < rdev->usec_timeout; i++) {
263 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
264 break;
265 udelay(1);
266 }
267
268 if (running)
269 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
270 }
271
272 return 0;
273}
274
0f0de06c
AD
275static int si_init_microcode(struct radeon_device *rdev)
276{
277 struct platform_device *pdev;
278 const char *chip_name;
279 const char *rlc_chip_name;
280 size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
281 char fw_name[30];
282 int err;
283
284 DRM_DEBUG("\n");
285
286 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
287 err = IS_ERR(pdev);
288 if (err) {
289 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
290 return -EINVAL;
291 }
292
293 switch (rdev->family) {
294 case CHIP_TAHITI:
295 chip_name = "TAHITI";
296 rlc_chip_name = "TAHITI";
297 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
298 me_req_size = SI_PM4_UCODE_SIZE * 4;
299 ce_req_size = SI_CE_UCODE_SIZE * 4;
300 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
301 mc_req_size = SI_MC_UCODE_SIZE * 4;
302 break;
303 case CHIP_PITCAIRN:
304 chip_name = "PITCAIRN";
305 rlc_chip_name = "PITCAIRN";
306 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
307 me_req_size = SI_PM4_UCODE_SIZE * 4;
308 ce_req_size = SI_CE_UCODE_SIZE * 4;
309 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
310 mc_req_size = SI_MC_UCODE_SIZE * 4;
311 break;
312 case CHIP_VERDE:
313 chip_name = "VERDE";
314 rlc_chip_name = "VERDE";
315 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
316 me_req_size = SI_PM4_UCODE_SIZE * 4;
317 ce_req_size = SI_CE_UCODE_SIZE * 4;
318 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
319 mc_req_size = SI_MC_UCODE_SIZE * 4;
320 break;
321 default: BUG();
322 }
323
324 DRM_INFO("Loading %s Microcode\n", chip_name);
325
326 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
327 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
328 if (err)
329 goto out;
330 if (rdev->pfp_fw->size != pfp_req_size) {
331 printk(KERN_ERR
332 "si_cp: Bogus length %zu in firmware \"%s\"\n",
333 rdev->pfp_fw->size, fw_name);
334 err = -EINVAL;
335 goto out;
336 }
337
338 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
339 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
340 if (err)
341 goto out;
342 if (rdev->me_fw->size != me_req_size) {
343 printk(KERN_ERR
344 "si_cp: Bogus length %zu in firmware \"%s\"\n",
345 rdev->me_fw->size, fw_name);
346 err = -EINVAL;
347 }
348
349 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
350 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
351 if (err)
352 goto out;
353 if (rdev->ce_fw->size != ce_req_size) {
354 printk(KERN_ERR
355 "si_cp: Bogus length %zu in firmware \"%s\"\n",
356 rdev->ce_fw->size, fw_name);
357 err = -EINVAL;
358 }
359
360 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
361 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
362 if (err)
363 goto out;
364 if (rdev->rlc_fw->size != rlc_req_size) {
365 printk(KERN_ERR
366 "si_rlc: Bogus length %zu in firmware \"%s\"\n",
367 rdev->rlc_fw->size, fw_name);
368 err = -EINVAL;
369 }
370
371 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
372 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
373 if (err)
374 goto out;
375 if (rdev->mc_fw->size != mc_req_size) {
376 printk(KERN_ERR
377 "si_mc: Bogus length %zu in firmware \"%s\"\n",
378 rdev->mc_fw->size, fw_name);
379 err = -EINVAL;
380 }
381
382out:
383 platform_device_unregister(pdev);
384
385 if (err) {
386 if (err != -EINVAL)
387 printk(KERN_ERR
388 "si_cp: Failed to load firmware \"%s\"\n",
389 fw_name);
390 release_firmware(rdev->pfp_fw);
391 rdev->pfp_fw = NULL;
392 release_firmware(rdev->me_fw);
393 rdev->me_fw = NULL;
394 release_firmware(rdev->ce_fw);
395 rdev->ce_fw = NULL;
396 release_firmware(rdev->rlc_fw);
397 rdev->rlc_fw = NULL;
398 release_firmware(rdev->mc_fw);
399 rdev->mc_fw = NULL;
400 }
401 return err;
402}
403
43b3cd99
AD
404/* watermark setup */
405static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
406 struct radeon_crtc *radeon_crtc,
407 struct drm_display_mode *mode,
408 struct drm_display_mode *other_mode)
409{
410 u32 tmp;
411 /*
412 * Line Buffer Setup
413 * There are 3 line buffers, each one shared by 2 display controllers.
414 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
415 * the display controllers. The paritioning is done via one of four
416 * preset allocations specified in bits 21:20:
417 * 0 - half lb
418 * 2 - whole lb, other crtc must be disabled
419 */
420 /* this can get tricky if we have two large displays on a paired group
421 * of crtcs. Ideally for multiple large displays we'd assign them to
422 * non-linked crtcs for maximum line buffer allocation.
423 */
424 if (radeon_crtc->base.enabled && mode) {
425 if (other_mode)
426 tmp = 0; /* 1/2 */
427 else
428 tmp = 2; /* whole */
429 } else
430 tmp = 0;
431
432 WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
433 DC_LB_MEMORY_CONFIG(tmp));
434
435 if (radeon_crtc->base.enabled && mode) {
436 switch (tmp) {
437 case 0:
438 default:
439 return 4096 * 2;
440 case 2:
441 return 8192 * 2;
442 }
443 }
444
445 /* controller not enabled, so no lb used */
446 return 0;
447}
448
449static u32 dce6_get_number_of_dram_channels(struct radeon_device *rdev)
450{
451 u32 tmp = RREG32(MC_SHARED_CHMAP);
452
453 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
454 case 0:
455 default:
456 return 1;
457 case 1:
458 return 2;
459 case 2:
460 return 4;
461 case 3:
462 return 8;
463 case 4:
464 return 3;
465 case 5:
466 return 6;
467 case 6:
468 return 10;
469 case 7:
470 return 12;
471 case 8:
472 return 16;
473 }
474}
475
476struct dce6_wm_params {
477 u32 dram_channels; /* number of dram channels */
478 u32 yclk; /* bandwidth per dram data pin in kHz */
479 u32 sclk; /* engine clock in kHz */
480 u32 disp_clk; /* display clock in kHz */
481 u32 src_width; /* viewport width */
482 u32 active_time; /* active display time in ns */
483 u32 blank_time; /* blank time in ns */
484 bool interlaced; /* mode is interlaced */
485 fixed20_12 vsc; /* vertical scale ratio */
486 u32 num_heads; /* number of active crtcs */
487 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
488 u32 lb_size; /* line buffer allocated to pipe */
489 u32 vtaps; /* vertical scaler taps */
490};
491
492static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
493{
494 /* Calculate raw DRAM Bandwidth */
495 fixed20_12 dram_efficiency; /* 0.7 */
496 fixed20_12 yclk, dram_channels, bandwidth;
497 fixed20_12 a;
498
499 a.full = dfixed_const(1000);
500 yclk.full = dfixed_const(wm->yclk);
501 yclk.full = dfixed_div(yclk, a);
502 dram_channels.full = dfixed_const(wm->dram_channels * 4);
503 a.full = dfixed_const(10);
504 dram_efficiency.full = dfixed_const(7);
505 dram_efficiency.full = dfixed_div(dram_efficiency, a);
506 bandwidth.full = dfixed_mul(dram_channels, yclk);
507 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
508
509 return dfixed_trunc(bandwidth);
510}
511
512static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
513{
514 /* Calculate DRAM Bandwidth and the part allocated to display. */
515 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
516 fixed20_12 yclk, dram_channels, bandwidth;
517 fixed20_12 a;
518
519 a.full = dfixed_const(1000);
520 yclk.full = dfixed_const(wm->yclk);
521 yclk.full = dfixed_div(yclk, a);
522 dram_channels.full = dfixed_const(wm->dram_channels * 4);
523 a.full = dfixed_const(10);
524 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
525 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
526 bandwidth.full = dfixed_mul(dram_channels, yclk);
527 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
528
529 return dfixed_trunc(bandwidth);
530}
531
532static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
533{
534 /* Calculate the display Data return Bandwidth */
535 fixed20_12 return_efficiency; /* 0.8 */
536 fixed20_12 sclk, bandwidth;
537 fixed20_12 a;
538
539 a.full = dfixed_const(1000);
540 sclk.full = dfixed_const(wm->sclk);
541 sclk.full = dfixed_div(sclk, a);
542 a.full = dfixed_const(10);
543 return_efficiency.full = dfixed_const(8);
544 return_efficiency.full = dfixed_div(return_efficiency, a);
545 a.full = dfixed_const(32);
546 bandwidth.full = dfixed_mul(a, sclk);
547 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
548
549 return dfixed_trunc(bandwidth);
550}
551
552static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
553{
554 return 32;
555}
556
557static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
558{
559 /* Calculate the DMIF Request Bandwidth */
560 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
561 fixed20_12 disp_clk, sclk, bandwidth;
562 fixed20_12 a, b1, b2;
563 u32 min_bandwidth;
564
565 a.full = dfixed_const(1000);
566 disp_clk.full = dfixed_const(wm->disp_clk);
567 disp_clk.full = dfixed_div(disp_clk, a);
568 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
569 b1.full = dfixed_mul(a, disp_clk);
570
571 a.full = dfixed_const(1000);
572 sclk.full = dfixed_const(wm->sclk);
573 sclk.full = dfixed_div(sclk, a);
574 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
575 b2.full = dfixed_mul(a, sclk);
576
577 a.full = dfixed_const(10);
578 disp_clk_request_efficiency.full = dfixed_const(8);
579 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
580
581 min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
582
583 a.full = dfixed_const(min_bandwidth);
584 bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
585
586 return dfixed_trunc(bandwidth);
587}
588
589static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
590{
591 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
592 u32 dram_bandwidth = dce6_dram_bandwidth(wm);
593 u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
594 u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
595
596 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
597}
598
599static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
600{
601 /* Calculate the display mode Average Bandwidth
602 * DisplayMode should contain the source and destination dimensions,
603 * timing, etc.
604 */
605 fixed20_12 bpp;
606 fixed20_12 line_time;
607 fixed20_12 src_width;
608 fixed20_12 bandwidth;
609 fixed20_12 a;
610
611 a.full = dfixed_const(1000);
612 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
613 line_time.full = dfixed_div(line_time, a);
614 bpp.full = dfixed_const(wm->bytes_per_pixel);
615 src_width.full = dfixed_const(wm->src_width);
616 bandwidth.full = dfixed_mul(src_width, bpp);
617 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
618 bandwidth.full = dfixed_div(bandwidth, line_time);
619
620 return dfixed_trunc(bandwidth);
621}
622
623static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
624{
625 /* First calcualte the latency in ns */
626 u32 mc_latency = 2000; /* 2000 ns. */
627 u32 available_bandwidth = dce6_available_bandwidth(wm);
628 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
629 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
630 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
631 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
632 (wm->num_heads * cursor_line_pair_return_time);
633 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
634 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
635 u32 tmp, dmif_size = 12288;
636 fixed20_12 a, b, c;
637
638 if (wm->num_heads == 0)
639 return 0;
640
641 a.full = dfixed_const(2);
642 b.full = dfixed_const(1);
643 if ((wm->vsc.full > a.full) ||
644 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
645 (wm->vtaps >= 5) ||
646 ((wm->vsc.full >= a.full) && wm->interlaced))
647 max_src_lines_per_dst_line = 4;
648 else
649 max_src_lines_per_dst_line = 2;
650
651 a.full = dfixed_const(available_bandwidth);
652 b.full = dfixed_const(wm->num_heads);
653 a.full = dfixed_div(a, b);
654
655 b.full = dfixed_const(mc_latency + 512);
656 c.full = dfixed_const(wm->disp_clk);
657 b.full = dfixed_div(b, c);
658
659 c.full = dfixed_const(dmif_size);
660 b.full = dfixed_div(c, b);
661
662 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
663
664 b.full = dfixed_const(1000);
665 c.full = dfixed_const(wm->disp_clk);
666 b.full = dfixed_div(c, b);
667 c.full = dfixed_const(wm->bytes_per_pixel);
668 b.full = dfixed_mul(b, c);
669
670 lb_fill_bw = min(tmp, dfixed_trunc(b));
671
672 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
673 b.full = dfixed_const(1000);
674 c.full = dfixed_const(lb_fill_bw);
675 b.full = dfixed_div(c, b);
676 a.full = dfixed_div(a, b);
677 line_fill_time = dfixed_trunc(a);
678
679 if (line_fill_time < wm->active_time)
680 return latency;
681 else
682 return latency + (line_fill_time - wm->active_time);
683
684}
685
686static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
687{
688 if (dce6_average_bandwidth(wm) <=
689 (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
690 return true;
691 else
692 return false;
693};
694
695static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
696{
697 if (dce6_average_bandwidth(wm) <=
698 (dce6_available_bandwidth(wm) / wm->num_heads))
699 return true;
700 else
701 return false;
702};
703
704static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
705{
706 u32 lb_partitions = wm->lb_size / wm->src_width;
707 u32 line_time = wm->active_time + wm->blank_time;
708 u32 latency_tolerant_lines;
709 u32 latency_hiding;
710 fixed20_12 a;
711
712 a.full = dfixed_const(1);
713 if (wm->vsc.full > a.full)
714 latency_tolerant_lines = 1;
715 else {
716 if (lb_partitions <= (wm->vtaps + 1))
717 latency_tolerant_lines = 1;
718 else
719 latency_tolerant_lines = 2;
720 }
721
722 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
723
724 if (dce6_latency_watermark(wm) <= latency_hiding)
725 return true;
726 else
727 return false;
728}
729
730static void dce6_program_watermarks(struct radeon_device *rdev,
731 struct radeon_crtc *radeon_crtc,
732 u32 lb_size, u32 num_heads)
733{
734 struct drm_display_mode *mode = &radeon_crtc->base.mode;
735 struct dce6_wm_params wm;
736 u32 pixel_period;
737 u32 line_time = 0;
738 u32 latency_watermark_a = 0, latency_watermark_b = 0;
739 u32 priority_a_mark = 0, priority_b_mark = 0;
740 u32 priority_a_cnt = PRIORITY_OFF;
741 u32 priority_b_cnt = PRIORITY_OFF;
742 u32 tmp, arb_control3;
743 fixed20_12 a, b, c;
744
745 if (radeon_crtc->base.enabled && num_heads && mode) {
746 pixel_period = 1000000 / (u32)mode->clock;
747 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
748 priority_a_cnt = 0;
749 priority_b_cnt = 0;
750
751 wm.yclk = rdev->pm.current_mclk * 10;
752 wm.sclk = rdev->pm.current_sclk * 10;
753 wm.disp_clk = mode->clock;
754 wm.src_width = mode->crtc_hdisplay;
755 wm.active_time = mode->crtc_hdisplay * pixel_period;
756 wm.blank_time = line_time - wm.active_time;
757 wm.interlaced = false;
758 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
759 wm.interlaced = true;
760 wm.vsc = radeon_crtc->vsc;
761 wm.vtaps = 1;
762 if (radeon_crtc->rmx_type != RMX_OFF)
763 wm.vtaps = 2;
764 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
765 wm.lb_size = lb_size;
766 wm.dram_channels = dce6_get_number_of_dram_channels(rdev);
767 wm.num_heads = num_heads;
768
769 /* set for high clocks */
770 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
771 /* set for low clocks */
772 /* wm.yclk = low clk; wm.sclk = low clk */
773 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
774
775 /* possibly force display priority to high */
776 /* should really do this at mode validation time... */
777 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
778 !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
779 !dce6_check_latency_hiding(&wm) ||
780 (rdev->disp_priority == 2)) {
781 DRM_DEBUG_KMS("force priority to high\n");
782 priority_a_cnt |= PRIORITY_ALWAYS_ON;
783 priority_b_cnt |= PRIORITY_ALWAYS_ON;
784 }
785
786 a.full = dfixed_const(1000);
787 b.full = dfixed_const(mode->clock);
788 b.full = dfixed_div(b, a);
789 c.full = dfixed_const(latency_watermark_a);
790 c.full = dfixed_mul(c, b);
791 c.full = dfixed_mul(c, radeon_crtc->hsc);
792 c.full = dfixed_div(c, a);
793 a.full = dfixed_const(16);
794 c.full = dfixed_div(c, a);
795 priority_a_mark = dfixed_trunc(c);
796 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
797
798 a.full = dfixed_const(1000);
799 b.full = dfixed_const(mode->clock);
800 b.full = dfixed_div(b, a);
801 c.full = dfixed_const(latency_watermark_b);
802 c.full = dfixed_mul(c, b);
803 c.full = dfixed_mul(c, radeon_crtc->hsc);
804 c.full = dfixed_div(c, a);
805 a.full = dfixed_const(16);
806 c.full = dfixed_div(c, a);
807 priority_b_mark = dfixed_trunc(c);
808 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
809 }
810
811 /* select wm A */
812 arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
813 tmp = arb_control3;
814 tmp &= ~LATENCY_WATERMARK_MASK(3);
815 tmp |= LATENCY_WATERMARK_MASK(1);
816 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
817 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
818 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
819 LATENCY_HIGH_WATERMARK(line_time)));
820 /* select wm B */
821 tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
822 tmp &= ~LATENCY_WATERMARK_MASK(3);
823 tmp |= LATENCY_WATERMARK_MASK(2);
824 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
825 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
826 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
827 LATENCY_HIGH_WATERMARK(line_time)));
828 /* restore original selection */
829 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
830
831 /* write the priority marks */
832 WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
833 WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
834
835}
836
837void dce6_bandwidth_update(struct radeon_device *rdev)
838{
839 struct drm_display_mode *mode0 = NULL;
840 struct drm_display_mode *mode1 = NULL;
841 u32 num_heads = 0, lb_size;
842 int i;
843
844 radeon_update_display_priority(rdev);
845
846 for (i = 0; i < rdev->num_crtc; i++) {
847 if (rdev->mode_info.crtcs[i]->base.enabled)
848 num_heads++;
849 }
850 for (i = 0; i < rdev->num_crtc; i += 2) {
851 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
852 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
853 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
854 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
855 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
856 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
857 }
858}
859
0a96d72b
AD
860/*
861 * Core functions
862 */
863static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
864 u32 num_tile_pipes,
865 u32 num_backends_per_asic,
866 u32 *backend_disable_mask_per_asic,
867 u32 num_shader_engines)
868{
869 u32 backend_map = 0;
870 u32 enabled_backends_mask = 0;
871 u32 enabled_backends_count = 0;
872 u32 num_backends_per_se;
873 u32 cur_pipe;
874 u32 swizzle_pipe[SI_MAX_PIPES];
875 u32 cur_backend = 0;
876 u32 i;
877 bool force_no_swizzle;
878
879 /* force legal values */
880 if (num_tile_pipes < 1)
881 num_tile_pipes = 1;
882 if (num_tile_pipes > rdev->config.si.max_tile_pipes)
883 num_tile_pipes = rdev->config.si.max_tile_pipes;
884 if (num_shader_engines < 1)
885 num_shader_engines = 1;
886 if (num_shader_engines > rdev->config.si.max_shader_engines)
887 num_shader_engines = rdev->config.si.max_shader_engines;
888 if (num_backends_per_asic < num_shader_engines)
889 num_backends_per_asic = num_shader_engines;
890 if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
891 num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
892
893 /* make sure we have the same number of backends per se */
894 num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
895 /* set up the number of backends per se */
896 num_backends_per_se = num_backends_per_asic / num_shader_engines;
897 if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
898 num_backends_per_se = rdev->config.si.max_backends_per_se;
899 num_backends_per_asic = num_backends_per_se * num_shader_engines;
900 }
901
902 /* create enable mask and count for enabled backends */
903 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
904 if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
905 enabled_backends_mask |= (1 << i);
906 ++enabled_backends_count;
907 }
908 if (enabled_backends_count == num_backends_per_asic)
909 break;
910 }
911
912 /* force the backends mask to match the current number of backends */
913 if (enabled_backends_count != num_backends_per_asic) {
914 u32 this_backend_enabled;
915 u32 shader_engine;
916 u32 backend_per_se;
917
918 enabled_backends_mask = 0;
919 enabled_backends_count = 0;
920 *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
921 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
922 /* calc the current se */
923 shader_engine = i / rdev->config.si.max_backends_per_se;
924 /* calc the backend per se */
925 backend_per_se = i % rdev->config.si.max_backends_per_se;
926 /* default to not enabled */
927 this_backend_enabled = 0;
928 if ((shader_engine < num_shader_engines) &&
929 (backend_per_se < num_backends_per_se))
930 this_backend_enabled = 1;
931 if (this_backend_enabled) {
932 enabled_backends_mask |= (1 << i);
933 *backend_disable_mask_per_asic &= ~(1 << i);
934 ++enabled_backends_count;
935 }
936 }
937 }
938
939
940 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
941 switch (rdev->family) {
942 case CHIP_TAHITI:
943 case CHIP_PITCAIRN:
944 case CHIP_VERDE:
945 force_no_swizzle = true;
946 break;
947 default:
948 force_no_swizzle = false;
949 break;
950 }
951 if (force_no_swizzle) {
952 bool last_backend_enabled = false;
953
954 force_no_swizzle = false;
955 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
956 if (((enabled_backends_mask >> i) & 1) == 1) {
957 if (last_backend_enabled)
958 force_no_swizzle = true;
959 last_backend_enabled = true;
960 } else
961 last_backend_enabled = false;
962 }
963 }
964
965 switch (num_tile_pipes) {
966 case 1:
967 case 3:
968 case 5:
969 case 7:
970 DRM_ERROR("odd number of pipes!\n");
971 break;
972 case 2:
973 swizzle_pipe[0] = 0;
974 swizzle_pipe[1] = 1;
975 break;
976 case 4:
977 if (force_no_swizzle) {
978 swizzle_pipe[0] = 0;
979 swizzle_pipe[1] = 1;
980 swizzle_pipe[2] = 2;
981 swizzle_pipe[3] = 3;
982 } else {
983 swizzle_pipe[0] = 0;
984 swizzle_pipe[1] = 2;
985 swizzle_pipe[2] = 1;
986 swizzle_pipe[3] = 3;
987 }
988 break;
989 case 6:
990 if (force_no_swizzle) {
991 swizzle_pipe[0] = 0;
992 swizzle_pipe[1] = 1;
993 swizzle_pipe[2] = 2;
994 swizzle_pipe[3] = 3;
995 swizzle_pipe[4] = 4;
996 swizzle_pipe[5] = 5;
997 } else {
998 swizzle_pipe[0] = 0;
999 swizzle_pipe[1] = 2;
1000 swizzle_pipe[2] = 4;
1001 swizzle_pipe[3] = 1;
1002 swizzle_pipe[4] = 3;
1003 swizzle_pipe[5] = 5;
1004 }
1005 break;
1006 case 8:
1007 if (force_no_swizzle) {
1008 swizzle_pipe[0] = 0;
1009 swizzle_pipe[1] = 1;
1010 swizzle_pipe[2] = 2;
1011 swizzle_pipe[3] = 3;
1012 swizzle_pipe[4] = 4;
1013 swizzle_pipe[5] = 5;
1014 swizzle_pipe[6] = 6;
1015 swizzle_pipe[7] = 7;
1016 } else {
1017 swizzle_pipe[0] = 0;
1018 swizzle_pipe[1] = 2;
1019 swizzle_pipe[2] = 4;
1020 swizzle_pipe[3] = 6;
1021 swizzle_pipe[4] = 1;
1022 swizzle_pipe[5] = 3;
1023 swizzle_pipe[6] = 5;
1024 swizzle_pipe[7] = 7;
1025 }
1026 break;
1027 }
1028
1029 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1030 while (((1 << cur_backend) & enabled_backends_mask) == 0)
1031 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1032
1033 backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
1034
1035 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1036 }
1037
1038 return backend_map;
1039}
1040
1041static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
1042 u32 disable_mask_per_se,
1043 u32 max_disable_mask_per_se,
1044 u32 num_shader_engines)
1045{
1046 u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
1047 u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
1048
1049 if (num_shader_engines == 1)
1050 return disable_mask_per_asic;
1051 else if (num_shader_engines == 2)
1052 return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
1053 else
1054 return 0xffffffff;
1055}
1056
1057static void si_tiling_mode_table_init(struct radeon_device *rdev)
1058{
1059 const u32 num_tile_mode_states = 32;
1060 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1061
1062 switch (rdev->config.si.mem_row_size_in_kb) {
1063 case 1:
1064 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1065 break;
1066 case 2:
1067 default:
1068 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1069 break;
1070 case 4:
1071 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1072 break;
1073 }
1074
1075 if ((rdev->family == CHIP_TAHITI) ||
1076 (rdev->family == CHIP_PITCAIRN)) {
1077 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1078 switch (reg_offset) {
1079 case 0: /* non-AA compressed depth or any compressed stencil */
1080 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1081 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1082 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1083 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1084 NUM_BANKS(ADDR_SURF_16_BANK) |
1085 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1086 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1087 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1088 break;
1089 case 1: /* 2xAA/4xAA compressed depth only */
1090 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1091 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1092 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1093 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1094 NUM_BANKS(ADDR_SURF_16_BANK) |
1095 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1096 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1097 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1098 break;
1099 case 2: /* 8xAA compressed depth only */
1100 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1101 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1102 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1103 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1104 NUM_BANKS(ADDR_SURF_16_BANK) |
1105 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1108 break;
1109 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1110 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1111 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1112 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1114 NUM_BANKS(ADDR_SURF_16_BANK) |
1115 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1116 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1117 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1118 break;
1119 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1120 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1121 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1122 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1123 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1124 NUM_BANKS(ADDR_SURF_16_BANK) |
1125 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1128 break;
1129 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1130 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1131 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1132 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1133 TILE_SPLIT(split_equal_to_row_size) |
1134 NUM_BANKS(ADDR_SURF_16_BANK) |
1135 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1138 break;
1139 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1140 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1141 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1142 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1143 TILE_SPLIT(split_equal_to_row_size) |
1144 NUM_BANKS(ADDR_SURF_16_BANK) |
1145 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1148 break;
1149 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1150 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1151 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1152 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1153 TILE_SPLIT(split_equal_to_row_size) |
1154 NUM_BANKS(ADDR_SURF_16_BANK) |
1155 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1158 break;
1159 case 8: /* 1D and 1D Array Surfaces */
1160 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1161 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1162 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1164 NUM_BANKS(ADDR_SURF_16_BANK) |
1165 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1168 break;
1169 case 9: /* Displayable maps. */
1170 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1171 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1172 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1173 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1174 NUM_BANKS(ADDR_SURF_16_BANK) |
1175 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1178 break;
1179 case 10: /* Display 8bpp. */
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1184 NUM_BANKS(ADDR_SURF_16_BANK) |
1185 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1188 break;
1189 case 11: /* Display 16bpp. */
1190 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1191 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1192 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1193 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1194 NUM_BANKS(ADDR_SURF_16_BANK) |
1195 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1198 break;
1199 case 12: /* Display 32bpp. */
1200 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1201 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1202 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1204 NUM_BANKS(ADDR_SURF_16_BANK) |
1205 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1208 break;
1209 case 13: /* Thin. */
1210 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1211 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1212 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1214 NUM_BANKS(ADDR_SURF_16_BANK) |
1215 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1218 break;
1219 case 14: /* Thin 8 bpp. */
1220 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1221 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1222 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1224 NUM_BANKS(ADDR_SURF_16_BANK) |
1225 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1228 break;
1229 case 15: /* Thin 16 bpp. */
1230 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1231 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1232 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1234 NUM_BANKS(ADDR_SURF_16_BANK) |
1235 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1238 break;
1239 case 16: /* Thin 32 bpp. */
1240 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1241 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1242 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1244 NUM_BANKS(ADDR_SURF_16_BANK) |
1245 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1248 break;
1249 case 17: /* Thin 64 bpp. */
1250 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1251 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1252 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1253 TILE_SPLIT(split_equal_to_row_size) |
1254 NUM_BANKS(ADDR_SURF_16_BANK) |
1255 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1258 break;
1259 case 21: /* 8 bpp PRT. */
1260 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1261 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1262 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1264 NUM_BANKS(ADDR_SURF_16_BANK) |
1265 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1268 break;
1269 case 22: /* 16 bpp PRT */
1270 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1271 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1272 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1274 NUM_BANKS(ADDR_SURF_16_BANK) |
1275 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1278 break;
1279 case 23: /* 32 bpp PRT */
1280 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1281 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1282 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1284 NUM_BANKS(ADDR_SURF_16_BANK) |
1285 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1288 break;
1289 case 24: /* 64 bpp PRT */
1290 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1291 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1292 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1294 NUM_BANKS(ADDR_SURF_16_BANK) |
1295 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1298 break;
1299 case 25: /* 128 bpp PRT */
1300 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1301 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1302 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1303 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1304 NUM_BANKS(ADDR_SURF_8_BANK) |
1305 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1306 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1307 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1308 break;
1309 default:
1310 gb_tile_moden = 0;
1311 break;
1312 }
1313 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1314 }
1315 } else if (rdev->family == CHIP_VERDE) {
1316 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1317 switch (reg_offset) {
1318 case 0: /* non-AA compressed depth or any compressed stencil */
1319 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1320 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1321 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1322 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1323 NUM_BANKS(ADDR_SURF_16_BANK) |
1324 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1327 break;
1328 case 1: /* 2xAA/4xAA compressed depth only */
1329 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1330 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1331 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1333 NUM_BANKS(ADDR_SURF_16_BANK) |
1334 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1337 break;
1338 case 2: /* 8xAA compressed depth only */
1339 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1340 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1341 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1343 NUM_BANKS(ADDR_SURF_16_BANK) |
1344 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1347 break;
1348 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1349 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1350 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1351 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1352 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1353 NUM_BANKS(ADDR_SURF_16_BANK) |
1354 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1357 break;
1358 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1359 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1360 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1361 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1363 NUM_BANKS(ADDR_SURF_16_BANK) |
1364 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1367 break;
1368 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1369 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1370 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1371 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1372 TILE_SPLIT(split_equal_to_row_size) |
1373 NUM_BANKS(ADDR_SURF_16_BANK) |
1374 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1377 break;
1378 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1379 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1380 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1381 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1382 TILE_SPLIT(split_equal_to_row_size) |
1383 NUM_BANKS(ADDR_SURF_16_BANK) |
1384 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1387 break;
1388 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1389 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1390 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1391 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1392 TILE_SPLIT(split_equal_to_row_size) |
1393 NUM_BANKS(ADDR_SURF_16_BANK) |
1394 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1397 break;
1398 case 8: /* 1D and 1D Array Surfaces */
1399 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1400 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1401 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1402 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1403 NUM_BANKS(ADDR_SURF_16_BANK) |
1404 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1407 break;
1408 case 9: /* Displayable maps. */
1409 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1410 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1411 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1412 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1413 NUM_BANKS(ADDR_SURF_16_BANK) |
1414 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1417 break;
1418 case 10: /* Display 8bpp. */
1419 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1420 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1421 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1423 NUM_BANKS(ADDR_SURF_16_BANK) |
1424 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1427 break;
1428 case 11: /* Display 16bpp. */
1429 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1430 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1431 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1433 NUM_BANKS(ADDR_SURF_16_BANK) |
1434 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1437 break;
1438 case 12: /* Display 32bpp. */
1439 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1440 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1441 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1442 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1443 NUM_BANKS(ADDR_SURF_16_BANK) |
1444 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1447 break;
1448 case 13: /* Thin. */
1449 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1450 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1451 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1453 NUM_BANKS(ADDR_SURF_16_BANK) |
1454 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1457 break;
1458 case 14: /* Thin 8 bpp. */
1459 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1460 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1461 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1462 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1463 NUM_BANKS(ADDR_SURF_16_BANK) |
1464 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1467 break;
1468 case 15: /* Thin 16 bpp. */
1469 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1470 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1471 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1472 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1473 NUM_BANKS(ADDR_SURF_16_BANK) |
1474 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1477 break;
1478 case 16: /* Thin 32 bpp. */
1479 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1480 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1481 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1482 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1483 NUM_BANKS(ADDR_SURF_16_BANK) |
1484 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1487 break;
1488 case 17: /* Thin 64 bpp. */
1489 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1490 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1491 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1492 TILE_SPLIT(split_equal_to_row_size) |
1493 NUM_BANKS(ADDR_SURF_16_BANK) |
1494 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1497 break;
1498 case 21: /* 8 bpp PRT. */
1499 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1500 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1501 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1502 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1503 NUM_BANKS(ADDR_SURF_16_BANK) |
1504 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1507 break;
1508 case 22: /* 16 bpp PRT */
1509 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1510 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1511 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1513 NUM_BANKS(ADDR_SURF_16_BANK) |
1514 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1517 break;
1518 case 23: /* 32 bpp PRT */
1519 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1520 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1521 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1523 NUM_BANKS(ADDR_SURF_16_BANK) |
1524 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1527 break;
1528 case 24: /* 64 bpp PRT */
1529 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1530 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1531 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1532 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1533 NUM_BANKS(ADDR_SURF_16_BANK) |
1534 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1537 break;
1538 case 25: /* 128 bpp PRT */
1539 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1540 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1541 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1542 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1543 NUM_BANKS(ADDR_SURF_8_BANK) |
1544 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1547 break;
1548 default:
1549 gb_tile_moden = 0;
1550 break;
1551 }
1552 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1553 }
1554 } else
1555 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1556}
1557
1558static void si_gpu_init(struct radeon_device *rdev)
1559{
1560 u32 cc_rb_backend_disable = 0;
1561 u32 cc_gc_shader_array_config;
1562 u32 gb_addr_config = 0;
1563 u32 mc_shared_chmap, mc_arb_ramcfg;
1564 u32 gb_backend_map;
1565 u32 cgts_tcc_disable;
1566 u32 sx_debug_1;
1567 u32 gc_user_shader_array_config;
1568 u32 gc_user_rb_backend_disable;
1569 u32 cgts_user_tcc_disable;
1570 u32 hdp_host_path_cntl;
1571 u32 tmp;
1572 int i, j;
1573
1574 switch (rdev->family) {
1575 case CHIP_TAHITI:
1576 rdev->config.si.max_shader_engines = 2;
1577 rdev->config.si.max_pipes_per_simd = 4;
1578 rdev->config.si.max_tile_pipes = 12;
1579 rdev->config.si.max_simds_per_se = 8;
1580 rdev->config.si.max_backends_per_se = 4;
1581 rdev->config.si.max_texture_channel_caches = 12;
1582 rdev->config.si.max_gprs = 256;
1583 rdev->config.si.max_gs_threads = 32;
1584 rdev->config.si.max_hw_contexts = 8;
1585
1586 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1587 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1588 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1589 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1590 break;
1591 case CHIP_PITCAIRN:
1592 rdev->config.si.max_shader_engines = 2;
1593 rdev->config.si.max_pipes_per_simd = 4;
1594 rdev->config.si.max_tile_pipes = 8;
1595 rdev->config.si.max_simds_per_se = 5;
1596 rdev->config.si.max_backends_per_se = 4;
1597 rdev->config.si.max_texture_channel_caches = 8;
1598 rdev->config.si.max_gprs = 256;
1599 rdev->config.si.max_gs_threads = 32;
1600 rdev->config.si.max_hw_contexts = 8;
1601
1602 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1603 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1604 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1605 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1606 break;
1607 case CHIP_VERDE:
1608 default:
1609 rdev->config.si.max_shader_engines = 1;
1610 rdev->config.si.max_pipes_per_simd = 4;
1611 rdev->config.si.max_tile_pipes = 4;
1612 rdev->config.si.max_simds_per_se = 2;
1613 rdev->config.si.max_backends_per_se = 4;
1614 rdev->config.si.max_texture_channel_caches = 4;
1615 rdev->config.si.max_gprs = 256;
1616 rdev->config.si.max_gs_threads = 32;
1617 rdev->config.si.max_hw_contexts = 8;
1618
1619 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1620 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1621 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1622 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1623 break;
1624 }
1625
1626 /* Initialize HDP */
1627 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1628 WREG32((0x2c14 + j), 0x00000000);
1629 WREG32((0x2c18 + j), 0x00000000);
1630 WREG32((0x2c1c + j), 0x00000000);
1631 WREG32((0x2c20 + j), 0x00000000);
1632 WREG32((0x2c24 + j), 0x00000000);
1633 }
1634
1635 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1636
1637 evergreen_fix_pci_max_read_req_size(rdev);
1638
1639 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1640
1641 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1642 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1643
1644 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
1645 cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1646 cgts_tcc_disable = 0xffff0000;
1647 for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
1648 cgts_tcc_disable &= ~(1 << (16 + i));
1649 gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
1650 gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1651 cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
1652
1653 rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
1654 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1655 tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1656 rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
1657 tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1658 rdev->config.si.backend_disable_mask_per_asic =
1659 si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
1660 rdev->config.si.num_shader_engines);
1661 rdev->config.si.backend_map =
1662 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1663 rdev->config.si.num_backends_per_se *
1664 rdev->config.si.num_shader_engines,
1665 &rdev->config.si.backend_disable_mask_per_asic,
1666 rdev->config.si.num_shader_engines);
1667 tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
1668 rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
1669 rdev->config.si.mem_max_burst_length_bytes = 256;
1670 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1671 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1672 if (rdev->config.si.mem_row_size_in_kb > 4)
1673 rdev->config.si.mem_row_size_in_kb = 4;
1674 /* XXX use MC settings? */
1675 rdev->config.si.shader_engine_tile_size = 32;
1676 rdev->config.si.num_gpus = 1;
1677 rdev->config.si.multi_gpu_tile_size = 64;
1678
1679 gb_addr_config = 0;
1680 switch (rdev->config.si.num_tile_pipes) {
1681 case 1:
1682 gb_addr_config |= NUM_PIPES(0);
1683 break;
1684 case 2:
1685 gb_addr_config |= NUM_PIPES(1);
1686 break;
1687 case 4:
1688 gb_addr_config |= NUM_PIPES(2);
1689 break;
1690 case 8:
1691 default:
1692 gb_addr_config |= NUM_PIPES(3);
1693 break;
1694 }
1695
1696 tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
1697 gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
1698 gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
1699 tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
1700 gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
1701 switch (rdev->config.si.num_gpus) {
1702 case 1:
1703 default:
1704 gb_addr_config |= NUM_GPUS(0);
1705 break;
1706 case 2:
1707 gb_addr_config |= NUM_GPUS(1);
1708 break;
1709 case 4:
1710 gb_addr_config |= NUM_GPUS(2);
1711 break;
1712 }
1713 switch (rdev->config.si.multi_gpu_tile_size) {
1714 case 16:
1715 gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
1716 break;
1717 case 32:
1718 default:
1719 gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
1720 break;
1721 case 64:
1722 gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
1723 break;
1724 case 128:
1725 gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
1726 break;
1727 }
1728 switch (rdev->config.si.mem_row_size_in_kb) {
1729 case 1:
1730 default:
1731 gb_addr_config |= ROW_SIZE(0);
1732 break;
1733 case 2:
1734 gb_addr_config |= ROW_SIZE(1);
1735 break;
1736 case 4:
1737 gb_addr_config |= ROW_SIZE(2);
1738 break;
1739 }
1740
1741 tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
1742 rdev->config.si.num_tile_pipes = (1 << tmp);
1743 tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
1744 rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
1745 tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
1746 rdev->config.si.num_shader_engines = tmp + 1;
1747 tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
1748 rdev->config.si.num_gpus = tmp + 1;
1749 tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
1750 rdev->config.si.multi_gpu_tile_size = 1 << tmp;
1751 tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
1752 rdev->config.si.mem_row_size_in_kb = 1 << tmp;
1753
1754 gb_backend_map =
1755 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1756 rdev->config.si.num_backends_per_se *
1757 rdev->config.si.num_shader_engines,
1758 &rdev->config.si.backend_disable_mask_per_asic,
1759 rdev->config.si.num_shader_engines);
1760
1761 /* setup tiling info dword. gb_addr_config is not adequate since it does
1762 * not have bank info, so create a custom tiling dword.
1763 * bits 3:0 num_pipes
1764 * bits 7:4 num_banks
1765 * bits 11:8 group_size
1766 * bits 15:12 row_size
1767 */
1768 rdev->config.si.tile_config = 0;
1769 switch (rdev->config.si.num_tile_pipes) {
1770 case 1:
1771 rdev->config.si.tile_config |= (0 << 0);
1772 break;
1773 case 2:
1774 rdev->config.si.tile_config |= (1 << 0);
1775 break;
1776 case 4:
1777 rdev->config.si.tile_config |= (2 << 0);
1778 break;
1779 case 8:
1780 default:
1781 /* XXX what about 12? */
1782 rdev->config.si.tile_config |= (3 << 0);
1783 break;
1784 }
1785 rdev->config.si.tile_config |=
1786 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
1787 rdev->config.si.tile_config |=
1788 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1789 rdev->config.si.tile_config |=
1790 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1791
1792 rdev->config.si.backend_map = gb_backend_map;
1793 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1794 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1795 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1796
1797 /* primary versions */
1798 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1799 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1800 WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1801
1802 WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1803
1804 /* user versions */
1805 WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1806 WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1807 WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1808
1809 WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1810
1811 si_tiling_mode_table_init(rdev);
1812
1813 /* set HW defaults for 3D engine */
1814 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1815 ROQ_IB2_START(0x2b)));
1816 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1817
1818 sx_debug_1 = RREG32(SX_DEBUG_1);
1819 WREG32(SX_DEBUG_1, sx_debug_1);
1820
1821 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1822
1823 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1824 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1825 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1826 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1827
1828 WREG32(VGT_NUM_INSTANCES, 1);
1829
1830 WREG32(CP_PERFMON_CNTL, 0);
1831
1832 WREG32(SQ_CONFIG, 0);
1833
1834 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1835 FORCE_EOV_MAX_REZ_CNT(255)));
1836
1837 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1838 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1839
1840 WREG32(VGT_GS_VERTEX_REUSE, 16);
1841 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1842
1843 WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1844 WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1845 WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1846 WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1847 WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1848 WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1849 WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1850 WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1851
1852 tmp = RREG32(HDP_MISC_CNTL);
1853 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1854 WREG32(HDP_MISC_CNTL, tmp);
1855
1856 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1857 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1858
1859 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1860
1861 udelay(50);
1862}
c476dde2
AD
1863
1864bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1865{
1866 u32 srbm_status;
1867 u32 grbm_status, grbm_status2;
1868 u32 grbm_status_se0, grbm_status_se1;
1869 struct r100_gpu_lockup *lockup = &rdev->config.si.lockup;
1870 int r;
1871
1872 srbm_status = RREG32(SRBM_STATUS);
1873 grbm_status = RREG32(GRBM_STATUS);
1874 grbm_status2 = RREG32(GRBM_STATUS2);
1875 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
1876 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
1877 if (!(grbm_status & GUI_ACTIVE)) {
1878 r100_gpu_lockup_update(lockup, ring);
1879 return false;
1880 }
1881 /* force CP activities */
1882 r = radeon_ring_lock(rdev, ring, 2);
1883 if (!r) {
1884 /* PACKET2 NOP */
1885 radeon_ring_write(ring, 0x80000000);
1886 radeon_ring_write(ring, 0x80000000);
1887 radeon_ring_unlock_commit(rdev, ring);
1888 }
1889 /* XXX deal with CP0,1,2 */
1890 ring->rptr = RREG32(ring->rptr_reg);
1891 return r100_gpu_cp_is_lockup(rdev, lockup, ring);
1892}
1893
1894static int si_gpu_soft_reset(struct radeon_device *rdev)
1895{
1896 struct evergreen_mc_save save;
1897 u32 grbm_reset = 0;
1898
1899 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
1900 return 0;
1901
1902 dev_info(rdev->dev, "GPU softreset \n");
1903 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1904 RREG32(GRBM_STATUS));
1905 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1906 RREG32(GRBM_STATUS2));
1907 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1908 RREG32(GRBM_STATUS_SE0));
1909 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1910 RREG32(GRBM_STATUS_SE1));
1911 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1912 RREG32(SRBM_STATUS));
1913 evergreen_mc_stop(rdev, &save);
1914 if (radeon_mc_wait_for_idle(rdev)) {
1915 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1916 }
1917 /* Disable CP parsing/prefetching */
1918 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
1919
1920 /* reset all the gfx blocks */
1921 grbm_reset = (SOFT_RESET_CP |
1922 SOFT_RESET_CB |
1923 SOFT_RESET_DB |
1924 SOFT_RESET_GDS |
1925 SOFT_RESET_PA |
1926 SOFT_RESET_SC |
1927 SOFT_RESET_SPI |
1928 SOFT_RESET_SX |
1929 SOFT_RESET_TC |
1930 SOFT_RESET_TA |
1931 SOFT_RESET_VGT |
1932 SOFT_RESET_IA);
1933
1934 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1935 WREG32(GRBM_SOFT_RESET, grbm_reset);
1936 (void)RREG32(GRBM_SOFT_RESET);
1937 udelay(50);
1938 WREG32(GRBM_SOFT_RESET, 0);
1939 (void)RREG32(GRBM_SOFT_RESET);
1940 /* Wait a little for things to settle down */
1941 udelay(50);
1942 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1943 RREG32(GRBM_STATUS));
1944 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1945 RREG32(GRBM_STATUS2));
1946 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1947 RREG32(GRBM_STATUS_SE0));
1948 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1949 RREG32(GRBM_STATUS_SE1));
1950 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1951 RREG32(SRBM_STATUS));
1952 evergreen_mc_resume(rdev, &save);
1953 return 0;
1954}
1955
1956int si_asic_reset(struct radeon_device *rdev)
1957{
1958 return si_gpu_soft_reset(rdev);
1959}
1960
d2800ee5
AD
1961/* MC */
1962static void si_mc_program(struct radeon_device *rdev)
1963{
1964 struct evergreen_mc_save save;
1965 u32 tmp;
1966 int i, j;
1967
1968 /* Initialize HDP */
1969 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1970 WREG32((0x2c14 + j), 0x00000000);
1971 WREG32((0x2c18 + j), 0x00000000);
1972 WREG32((0x2c1c + j), 0x00000000);
1973 WREG32((0x2c20 + j), 0x00000000);
1974 WREG32((0x2c24 + j), 0x00000000);
1975 }
1976 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
1977
1978 evergreen_mc_stop(rdev, &save);
1979 if (radeon_mc_wait_for_idle(rdev)) {
1980 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1981 }
1982 /* Lockout access through VGA aperture*/
1983 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
1984 /* Update configuration */
1985 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
1986 rdev->mc.vram_start >> 12);
1987 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
1988 rdev->mc.vram_end >> 12);
1989 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
1990 rdev->vram_scratch.gpu_addr >> 12);
1991 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
1992 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
1993 WREG32(MC_VM_FB_LOCATION, tmp);
1994 /* XXX double check these! */
1995 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
1996 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
1997 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
1998 WREG32(MC_VM_AGP_BASE, 0);
1999 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2000 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2001 if (radeon_mc_wait_for_idle(rdev)) {
2002 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2003 }
2004 evergreen_mc_resume(rdev, &save);
2005 /* we need to own VRAM, so turn off the VGA renderer here
2006 * to stop it overwriting our objects */
2007 rv515_vga_render_disable(rdev);
2008}
2009
2010/* SI MC address space is 40 bits */
2011static void si_vram_location(struct radeon_device *rdev,
2012 struct radeon_mc *mc, u64 base)
2013{
2014 mc->vram_start = base;
2015 if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2016 dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2017 mc->real_vram_size = mc->aper_size;
2018 mc->mc_vram_size = mc->aper_size;
2019 }
2020 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2021 dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
2022 mc->mc_vram_size >> 20, mc->vram_start,
2023 mc->vram_end, mc->real_vram_size >> 20);
2024}
2025
2026static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2027{
2028 u64 size_af, size_bf;
2029
2030 size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2031 size_bf = mc->vram_start & ~mc->gtt_base_align;
2032 if (size_bf > size_af) {
2033 if (mc->gtt_size > size_bf) {
2034 dev_warn(rdev->dev, "limiting GTT\n");
2035 mc->gtt_size = size_bf;
2036 }
2037 mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2038 } else {
2039 if (mc->gtt_size > size_af) {
2040 dev_warn(rdev->dev, "limiting GTT\n");
2041 mc->gtt_size = size_af;
2042 }
2043 mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2044 }
2045 mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2046 dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
2047 mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
2048}
2049
2050static void si_vram_gtt_location(struct radeon_device *rdev,
2051 struct radeon_mc *mc)
2052{
2053 if (mc->mc_vram_size > 0xFFC0000000ULL) {
2054 /* leave room for at least 1024M GTT */
2055 dev_warn(rdev->dev, "limiting VRAM\n");
2056 mc->real_vram_size = 0xFFC0000000ULL;
2057 mc->mc_vram_size = 0xFFC0000000ULL;
2058 }
2059 si_vram_location(rdev, &rdev->mc, 0);
2060 rdev->mc.gtt_base_align = 0;
2061 si_gtt_location(rdev, mc);
2062}
2063
2064static int si_mc_init(struct radeon_device *rdev)
2065{
2066 u32 tmp;
2067 int chansize, numchan;
2068
2069 /* Get VRAM informations */
2070 rdev->mc.vram_is_ddr = true;
2071 tmp = RREG32(MC_ARB_RAMCFG);
2072 if (tmp & CHANSIZE_OVERRIDE) {
2073 chansize = 16;
2074 } else if (tmp & CHANSIZE_MASK) {
2075 chansize = 64;
2076 } else {
2077 chansize = 32;
2078 }
2079 tmp = RREG32(MC_SHARED_CHMAP);
2080 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2081 case 0:
2082 default:
2083 numchan = 1;
2084 break;
2085 case 1:
2086 numchan = 2;
2087 break;
2088 case 2:
2089 numchan = 4;
2090 break;
2091 case 3:
2092 numchan = 8;
2093 break;
2094 case 4:
2095 numchan = 3;
2096 break;
2097 case 5:
2098 numchan = 6;
2099 break;
2100 case 6:
2101 numchan = 10;
2102 break;
2103 case 7:
2104 numchan = 12;
2105 break;
2106 case 8:
2107 numchan = 16;
2108 break;
2109 }
2110 rdev->mc.vram_width = numchan * chansize;
2111 /* Could aper size report 0 ? */
2112 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2113 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2114 /* size in MB on si */
2115 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2116 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2117 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2118 si_vram_gtt_location(rdev, &rdev->mc);
2119 radeon_update_bandwidth_info(rdev);
2120
2121 return 0;
2122}
2123
2124/*
2125 * GART
2126 */
2127void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
2128{
2129 /* flush hdp cache */
2130 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2131
2132 /* bits 0-15 are the VM contexts0-15 */
2133 WREG32(VM_INVALIDATE_REQUEST, 1);
2134}
2135
2136int si_pcie_gart_enable(struct radeon_device *rdev)
2137{
2138 int r, i;
2139
2140 if (rdev->gart.robj == NULL) {
2141 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2142 return -EINVAL;
2143 }
2144 r = radeon_gart_table_vram_pin(rdev);
2145 if (r)
2146 return r;
2147 radeon_gart_restore(rdev);
2148 /* Setup TLB control */
2149 WREG32(MC_VM_MX_L1_TLB_CNTL,
2150 (0xA << 7) |
2151 ENABLE_L1_TLB |
2152 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2153 ENABLE_ADVANCED_DRIVER_MODEL |
2154 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2155 /* Setup L2 cache */
2156 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2157 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2158 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2159 EFFECTIVE_L2_QUEUE_SIZE(7) |
2160 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2161 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2162 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2163 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2164 /* setup context0 */
2165 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2166 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2167 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2168 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2169 (u32)(rdev->dummy_page.addr >> 12));
2170 WREG32(VM_CONTEXT0_CNTL2, 0);
2171 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2172 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2173
2174 WREG32(0x15D4, 0);
2175 WREG32(0x15D8, 0);
2176 WREG32(0x15DC, 0);
2177
2178 /* empty context1-15 */
2179 /* FIXME start with 1G, once using 2 level pt switch to full
2180 * vm size space
2181 */
2182 /* set vm size, must be a multiple of 4 */
2183 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2184 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
2185 for (i = 1; i < 16; i++) {
2186 if (i < 8)
2187 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2188 rdev->gart.table_addr >> 12);
2189 else
2190 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2191 rdev->gart.table_addr >> 12);
2192 }
2193
2194 /* enable context1-15 */
2195 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2196 (u32)(rdev->dummy_page.addr >> 12));
2197 WREG32(VM_CONTEXT1_CNTL2, 0);
2198 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2199 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
2200
2201 si_pcie_gart_tlb_flush(rdev);
2202 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2203 (unsigned)(rdev->mc.gtt_size >> 20),
2204 (unsigned long long)rdev->gart.table_addr);
2205 rdev->gart.ready = true;
2206 return 0;
2207}
2208
2209void si_pcie_gart_disable(struct radeon_device *rdev)
2210{
2211 /* Disable all tables */
2212 WREG32(VM_CONTEXT0_CNTL, 0);
2213 WREG32(VM_CONTEXT1_CNTL, 0);
2214 /* Setup TLB control */
2215 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2216 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2217 /* Setup L2 cache */
2218 WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2219 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2220 EFFECTIVE_L2_QUEUE_SIZE(7) |
2221 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2222 WREG32(VM_L2_CNTL2, 0);
2223 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2224 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2225 radeon_gart_table_vram_unpin(rdev);
2226}
2227
2228void si_pcie_gart_fini(struct radeon_device *rdev)
2229{
2230 si_pcie_gart_disable(rdev);
2231 radeon_gart_table_vram_free(rdev);
2232 radeon_gart_fini(rdev);
2233}
2234
498dd8b3
AD
2235/* vm parser */
2236static bool si_vm_reg_valid(u32 reg)
2237{
2238 /* context regs are fine */
2239 if (reg >= 0x28000)
2240 return true;
2241
2242 /* check config regs */
2243 switch (reg) {
2244 case GRBM_GFX_INDEX:
2245 case VGT_VTX_VECT_EJECT_REG:
2246 case VGT_CACHE_INVALIDATION:
2247 case VGT_ESGS_RING_SIZE:
2248 case VGT_GSVS_RING_SIZE:
2249 case VGT_GS_VERTEX_REUSE:
2250 case VGT_PRIMITIVE_TYPE:
2251 case VGT_INDEX_TYPE:
2252 case VGT_NUM_INDICES:
2253 case VGT_NUM_INSTANCES:
2254 case VGT_TF_RING_SIZE:
2255 case VGT_HS_OFFCHIP_PARAM:
2256 case VGT_TF_MEMORY_BASE:
2257 case PA_CL_ENHANCE:
2258 case PA_SU_LINE_STIPPLE_VALUE:
2259 case PA_SC_LINE_STIPPLE_STATE:
2260 case PA_SC_ENHANCE:
2261 case SQC_CACHES:
2262 case SPI_STATIC_THREAD_MGMT_1:
2263 case SPI_STATIC_THREAD_MGMT_2:
2264 case SPI_STATIC_THREAD_MGMT_3:
2265 case SPI_PS_MAX_WAVE_ID:
2266 case SPI_CONFIG_CNTL:
2267 case SPI_CONFIG_CNTL_1:
2268 case TA_CNTL_AUX:
2269 return true;
2270 default:
2271 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2272 return false;
2273 }
2274}
2275
2276static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2277 u32 *ib, struct radeon_cs_packet *pkt)
2278{
2279 switch (pkt->opcode) {
2280 case PACKET3_NOP:
2281 case PACKET3_SET_BASE:
2282 case PACKET3_SET_CE_DE_COUNTERS:
2283 case PACKET3_LOAD_CONST_RAM:
2284 case PACKET3_WRITE_CONST_RAM:
2285 case PACKET3_WRITE_CONST_RAM_OFFSET:
2286 case PACKET3_DUMP_CONST_RAM:
2287 case PACKET3_INCREMENT_CE_COUNTER:
2288 case PACKET3_WAIT_ON_DE_COUNTER:
2289 case PACKET3_CE_WRITE:
2290 break;
2291 default:
2292 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2293 return -EINVAL;
2294 }
2295 return 0;
2296}
2297
2298static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2299 u32 *ib, struct radeon_cs_packet *pkt)
2300{
2301 u32 idx = pkt->idx + 1;
2302 u32 idx_value = ib[idx];
2303 u32 start_reg, end_reg, reg, i;
2304
2305 switch (pkt->opcode) {
2306 case PACKET3_NOP:
2307 case PACKET3_SET_BASE:
2308 case PACKET3_CLEAR_STATE:
2309 case PACKET3_INDEX_BUFFER_SIZE:
2310 case PACKET3_DISPATCH_DIRECT:
2311 case PACKET3_DISPATCH_INDIRECT:
2312 case PACKET3_ALLOC_GDS:
2313 case PACKET3_WRITE_GDS_RAM:
2314 case PACKET3_ATOMIC_GDS:
2315 case PACKET3_ATOMIC:
2316 case PACKET3_OCCLUSION_QUERY:
2317 case PACKET3_SET_PREDICATION:
2318 case PACKET3_COND_EXEC:
2319 case PACKET3_PRED_EXEC:
2320 case PACKET3_DRAW_INDIRECT:
2321 case PACKET3_DRAW_INDEX_INDIRECT:
2322 case PACKET3_INDEX_BASE:
2323 case PACKET3_DRAW_INDEX_2:
2324 case PACKET3_CONTEXT_CONTROL:
2325 case PACKET3_INDEX_TYPE:
2326 case PACKET3_DRAW_INDIRECT_MULTI:
2327 case PACKET3_DRAW_INDEX_AUTO:
2328 case PACKET3_DRAW_INDEX_IMMD:
2329 case PACKET3_NUM_INSTANCES:
2330 case PACKET3_DRAW_INDEX_MULTI_AUTO:
2331 case PACKET3_STRMOUT_BUFFER_UPDATE:
2332 case PACKET3_DRAW_INDEX_OFFSET_2:
2333 case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2334 case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2335 case PACKET3_MPEG_INDEX:
2336 case PACKET3_WAIT_REG_MEM:
2337 case PACKET3_MEM_WRITE:
2338 case PACKET3_PFP_SYNC_ME:
2339 case PACKET3_SURFACE_SYNC:
2340 case PACKET3_EVENT_WRITE:
2341 case PACKET3_EVENT_WRITE_EOP:
2342 case PACKET3_EVENT_WRITE_EOS:
2343 case PACKET3_SET_CONTEXT_REG:
2344 case PACKET3_SET_CONTEXT_REG_INDIRECT:
2345 case PACKET3_SET_SH_REG:
2346 case PACKET3_SET_SH_REG_OFFSET:
2347 case PACKET3_INCREMENT_DE_COUNTER:
2348 case PACKET3_WAIT_ON_CE_COUNTER:
2349 case PACKET3_WAIT_ON_AVAIL_BUFFER:
2350 case PACKET3_ME_WRITE:
2351 break;
2352 case PACKET3_COPY_DATA:
2353 if ((idx_value & 0xf00) == 0) {
2354 reg = ib[idx + 3] * 4;
2355 if (!si_vm_reg_valid(reg))
2356 return -EINVAL;
2357 }
2358 break;
2359 case PACKET3_WRITE_DATA:
2360 if ((idx_value & 0xf00) == 0) {
2361 start_reg = ib[idx + 1] * 4;
2362 if (idx_value & 0x10000) {
2363 if (!si_vm_reg_valid(start_reg))
2364 return -EINVAL;
2365 } else {
2366 for (i = 0; i < (pkt->count - 2); i++) {
2367 reg = start_reg + (4 * i);
2368 if (!si_vm_reg_valid(reg))
2369 return -EINVAL;
2370 }
2371 }
2372 }
2373 break;
2374 case PACKET3_COND_WRITE:
2375 if (idx_value & 0x100) {
2376 reg = ib[idx + 5] * 4;
2377 if (!si_vm_reg_valid(reg))
2378 return -EINVAL;
2379 }
2380 break;
2381 case PACKET3_COPY_DW:
2382 if (idx_value & 0x2) {
2383 reg = ib[idx + 3] * 4;
2384 if (!si_vm_reg_valid(reg))
2385 return -EINVAL;
2386 }
2387 break;
2388 case PACKET3_SET_CONFIG_REG:
2389 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2390 end_reg = 4 * pkt->count + start_reg - 4;
2391 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2392 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2393 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2394 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2395 return -EINVAL;
2396 }
2397 for (i = 0; i < pkt->count; i++) {
2398 reg = start_reg + (4 * i);
2399 if (!si_vm_reg_valid(reg))
2400 return -EINVAL;
2401 }
2402 break;
2403 default:
2404 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2405 return -EINVAL;
2406 }
2407 return 0;
2408}
2409
2410static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2411 u32 *ib, struct radeon_cs_packet *pkt)
2412{
2413 u32 idx = pkt->idx + 1;
2414 u32 idx_value = ib[idx];
2415 u32 start_reg, reg, i;
2416
2417 switch (pkt->opcode) {
2418 case PACKET3_NOP:
2419 case PACKET3_SET_BASE:
2420 case PACKET3_CLEAR_STATE:
2421 case PACKET3_DISPATCH_DIRECT:
2422 case PACKET3_DISPATCH_INDIRECT:
2423 case PACKET3_ALLOC_GDS:
2424 case PACKET3_WRITE_GDS_RAM:
2425 case PACKET3_ATOMIC_GDS:
2426 case PACKET3_ATOMIC:
2427 case PACKET3_OCCLUSION_QUERY:
2428 case PACKET3_SET_PREDICATION:
2429 case PACKET3_COND_EXEC:
2430 case PACKET3_PRED_EXEC:
2431 case PACKET3_CONTEXT_CONTROL:
2432 case PACKET3_STRMOUT_BUFFER_UPDATE:
2433 case PACKET3_WAIT_REG_MEM:
2434 case PACKET3_MEM_WRITE:
2435 case PACKET3_PFP_SYNC_ME:
2436 case PACKET3_SURFACE_SYNC:
2437 case PACKET3_EVENT_WRITE:
2438 case PACKET3_EVENT_WRITE_EOP:
2439 case PACKET3_EVENT_WRITE_EOS:
2440 case PACKET3_SET_CONTEXT_REG:
2441 case PACKET3_SET_CONTEXT_REG_INDIRECT:
2442 case PACKET3_SET_SH_REG:
2443 case PACKET3_SET_SH_REG_OFFSET:
2444 case PACKET3_INCREMENT_DE_COUNTER:
2445 case PACKET3_WAIT_ON_CE_COUNTER:
2446 case PACKET3_WAIT_ON_AVAIL_BUFFER:
2447 case PACKET3_ME_WRITE:
2448 break;
2449 case PACKET3_COPY_DATA:
2450 if ((idx_value & 0xf00) == 0) {
2451 reg = ib[idx + 3] * 4;
2452 if (!si_vm_reg_valid(reg))
2453 return -EINVAL;
2454 }
2455 break;
2456 case PACKET3_WRITE_DATA:
2457 if ((idx_value & 0xf00) == 0) {
2458 start_reg = ib[idx + 1] * 4;
2459 if (idx_value & 0x10000) {
2460 if (!si_vm_reg_valid(start_reg))
2461 return -EINVAL;
2462 } else {
2463 for (i = 0; i < (pkt->count - 2); i++) {
2464 reg = start_reg + (4 * i);
2465 if (!si_vm_reg_valid(reg))
2466 return -EINVAL;
2467 }
2468 }
2469 }
2470 break;
2471 case PACKET3_COND_WRITE:
2472 if (idx_value & 0x100) {
2473 reg = ib[idx + 5] * 4;
2474 if (!si_vm_reg_valid(reg))
2475 return -EINVAL;
2476 }
2477 break;
2478 case PACKET3_COPY_DW:
2479 if (idx_value & 0x2) {
2480 reg = ib[idx + 3] * 4;
2481 if (!si_vm_reg_valid(reg))
2482 return -EINVAL;
2483 }
2484 break;
2485 default:
2486 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2487 return -EINVAL;
2488 }
2489 return 0;
2490}
2491
2492int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2493{
2494 int ret = 0;
2495 u32 idx = 0;
2496 struct radeon_cs_packet pkt;
2497
2498 do {
2499 pkt.idx = idx;
2500 pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2501 pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2502 pkt.one_reg_wr = 0;
2503 switch (pkt.type) {
2504 case PACKET_TYPE0:
2505 dev_err(rdev->dev, "Packet0 not allowed!\n");
2506 ret = -EINVAL;
2507 break;
2508 case PACKET_TYPE2:
2509 idx += 1;
2510 break;
2511 case PACKET_TYPE3:
2512 pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2513 if (ib->is_const_ib)
2514 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2515 else {
2516 switch (ib->fence->ring) {
2517 case RADEON_RING_TYPE_GFX_INDEX:
2518 ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2519 break;
2520 case CAYMAN_RING_TYPE_CP1_INDEX:
2521 case CAYMAN_RING_TYPE_CP2_INDEX:
2522 ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2523 break;
2524 default:
2525 dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
2526 ret = -EINVAL;
2527 break;
2528 }
2529 }
2530 idx += pkt.count + 2;
2531 break;
2532 default:
2533 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2534 ret = -EINVAL;
2535 break;
2536 }
2537 if (ret)
2538 break;
2539 } while (idx < ib->length_dw);
2540
2541 return ret;
2542}
2543
d2800ee5
AD
2544/*
2545 * vm
2546 */
2547int si_vm_init(struct radeon_device *rdev)
2548{
2549 /* number of VMs */
2550 rdev->vm_manager.nvm = 16;
2551 /* base offset of vram pages */
2552 rdev->vm_manager.vram_base_offset = 0;
2553
2554 return 0;
2555}
2556
2557void si_vm_fini(struct radeon_device *rdev)
2558{
2559}
2560
2561int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
2562{
2563 if (id < 8)
2564 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
2565 else
2566 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
2567 vm->pt_gpu_addr >> 12);
2568 /* flush hdp cache */
2569 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2570 /* bits 0-15 are the VM contexts0-15 */
2571 WREG32(VM_INVALIDATE_REQUEST, 1 << id);
2572 return 0;
2573}
2574
2575void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
2576{
2577 if (vm->id < 8)
2578 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
2579 else
2580 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0);
2581 /* flush hdp cache */
2582 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2583 /* bits 0-15 are the VM contexts0-15 */
2584 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2585}
2586
2587void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
2588{
2589 if (vm->id == -1)
2590 return;
2591
2592 /* flush hdp cache */
2593 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2594 /* bits 0-15 are the VM contexts0-15 */
2595 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2596}
2597