drm/radeon/kms: add gpu init support for SI
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
CommitLineData
43b3cd99
AD
1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include "drmP.h"
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "radeon_drm.h"
28#include "sid.h"
29#include "atom.h"
30
0a96d72b
AD
31extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
32
1bd47d2e
AD
33/* get temperature in millidegrees */
34int si_get_temp(struct radeon_device *rdev)
35{
36 u32 temp;
37 int actual_temp = 0;
38
39 temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
40 CTF_TEMP_SHIFT;
41
42 if (temp & 0x200)
43 actual_temp = 255;
44 else
45 actual_temp = temp & 0x1ff;
46
47 actual_temp = (actual_temp * 1000);
48
49 return actual_temp;
50}
51
43b3cd99
AD
52/* watermark setup */
53static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
54 struct radeon_crtc *radeon_crtc,
55 struct drm_display_mode *mode,
56 struct drm_display_mode *other_mode)
57{
58 u32 tmp;
59 /*
60 * Line Buffer Setup
61 * There are 3 line buffers, each one shared by 2 display controllers.
62 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
63 * the display controllers. The paritioning is done via one of four
64 * preset allocations specified in bits 21:20:
65 * 0 - half lb
66 * 2 - whole lb, other crtc must be disabled
67 */
68 /* this can get tricky if we have two large displays on a paired group
69 * of crtcs. Ideally for multiple large displays we'd assign them to
70 * non-linked crtcs for maximum line buffer allocation.
71 */
72 if (radeon_crtc->base.enabled && mode) {
73 if (other_mode)
74 tmp = 0; /* 1/2 */
75 else
76 tmp = 2; /* whole */
77 } else
78 tmp = 0;
79
80 WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
81 DC_LB_MEMORY_CONFIG(tmp));
82
83 if (radeon_crtc->base.enabled && mode) {
84 switch (tmp) {
85 case 0:
86 default:
87 return 4096 * 2;
88 case 2:
89 return 8192 * 2;
90 }
91 }
92
93 /* controller not enabled, so no lb used */
94 return 0;
95}
96
97static u32 dce6_get_number_of_dram_channels(struct radeon_device *rdev)
98{
99 u32 tmp = RREG32(MC_SHARED_CHMAP);
100
101 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
102 case 0:
103 default:
104 return 1;
105 case 1:
106 return 2;
107 case 2:
108 return 4;
109 case 3:
110 return 8;
111 case 4:
112 return 3;
113 case 5:
114 return 6;
115 case 6:
116 return 10;
117 case 7:
118 return 12;
119 case 8:
120 return 16;
121 }
122}
123
124struct dce6_wm_params {
125 u32 dram_channels; /* number of dram channels */
126 u32 yclk; /* bandwidth per dram data pin in kHz */
127 u32 sclk; /* engine clock in kHz */
128 u32 disp_clk; /* display clock in kHz */
129 u32 src_width; /* viewport width */
130 u32 active_time; /* active display time in ns */
131 u32 blank_time; /* blank time in ns */
132 bool interlaced; /* mode is interlaced */
133 fixed20_12 vsc; /* vertical scale ratio */
134 u32 num_heads; /* number of active crtcs */
135 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
136 u32 lb_size; /* line buffer allocated to pipe */
137 u32 vtaps; /* vertical scaler taps */
138};
139
140static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
141{
142 /* Calculate raw DRAM Bandwidth */
143 fixed20_12 dram_efficiency; /* 0.7 */
144 fixed20_12 yclk, dram_channels, bandwidth;
145 fixed20_12 a;
146
147 a.full = dfixed_const(1000);
148 yclk.full = dfixed_const(wm->yclk);
149 yclk.full = dfixed_div(yclk, a);
150 dram_channels.full = dfixed_const(wm->dram_channels * 4);
151 a.full = dfixed_const(10);
152 dram_efficiency.full = dfixed_const(7);
153 dram_efficiency.full = dfixed_div(dram_efficiency, a);
154 bandwidth.full = dfixed_mul(dram_channels, yclk);
155 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
156
157 return dfixed_trunc(bandwidth);
158}
159
160static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
161{
162 /* Calculate DRAM Bandwidth and the part allocated to display. */
163 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
164 fixed20_12 yclk, dram_channels, bandwidth;
165 fixed20_12 a;
166
167 a.full = dfixed_const(1000);
168 yclk.full = dfixed_const(wm->yclk);
169 yclk.full = dfixed_div(yclk, a);
170 dram_channels.full = dfixed_const(wm->dram_channels * 4);
171 a.full = dfixed_const(10);
172 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
173 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
174 bandwidth.full = dfixed_mul(dram_channels, yclk);
175 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
176
177 return dfixed_trunc(bandwidth);
178}
179
180static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
181{
182 /* Calculate the display Data return Bandwidth */
183 fixed20_12 return_efficiency; /* 0.8 */
184 fixed20_12 sclk, bandwidth;
185 fixed20_12 a;
186
187 a.full = dfixed_const(1000);
188 sclk.full = dfixed_const(wm->sclk);
189 sclk.full = dfixed_div(sclk, a);
190 a.full = dfixed_const(10);
191 return_efficiency.full = dfixed_const(8);
192 return_efficiency.full = dfixed_div(return_efficiency, a);
193 a.full = dfixed_const(32);
194 bandwidth.full = dfixed_mul(a, sclk);
195 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
196
197 return dfixed_trunc(bandwidth);
198}
199
200static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
201{
202 return 32;
203}
204
205static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
206{
207 /* Calculate the DMIF Request Bandwidth */
208 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
209 fixed20_12 disp_clk, sclk, bandwidth;
210 fixed20_12 a, b1, b2;
211 u32 min_bandwidth;
212
213 a.full = dfixed_const(1000);
214 disp_clk.full = dfixed_const(wm->disp_clk);
215 disp_clk.full = dfixed_div(disp_clk, a);
216 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
217 b1.full = dfixed_mul(a, disp_clk);
218
219 a.full = dfixed_const(1000);
220 sclk.full = dfixed_const(wm->sclk);
221 sclk.full = dfixed_div(sclk, a);
222 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
223 b2.full = dfixed_mul(a, sclk);
224
225 a.full = dfixed_const(10);
226 disp_clk_request_efficiency.full = dfixed_const(8);
227 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
228
229 min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
230
231 a.full = dfixed_const(min_bandwidth);
232 bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
233
234 return dfixed_trunc(bandwidth);
235}
236
237static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
238{
239 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
240 u32 dram_bandwidth = dce6_dram_bandwidth(wm);
241 u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
242 u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
243
244 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
245}
246
247static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
248{
249 /* Calculate the display mode Average Bandwidth
250 * DisplayMode should contain the source and destination dimensions,
251 * timing, etc.
252 */
253 fixed20_12 bpp;
254 fixed20_12 line_time;
255 fixed20_12 src_width;
256 fixed20_12 bandwidth;
257 fixed20_12 a;
258
259 a.full = dfixed_const(1000);
260 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
261 line_time.full = dfixed_div(line_time, a);
262 bpp.full = dfixed_const(wm->bytes_per_pixel);
263 src_width.full = dfixed_const(wm->src_width);
264 bandwidth.full = dfixed_mul(src_width, bpp);
265 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
266 bandwidth.full = dfixed_div(bandwidth, line_time);
267
268 return dfixed_trunc(bandwidth);
269}
270
271static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
272{
273 /* First calcualte the latency in ns */
274 u32 mc_latency = 2000; /* 2000 ns. */
275 u32 available_bandwidth = dce6_available_bandwidth(wm);
276 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
277 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
278 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
279 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
280 (wm->num_heads * cursor_line_pair_return_time);
281 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
282 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
283 u32 tmp, dmif_size = 12288;
284 fixed20_12 a, b, c;
285
286 if (wm->num_heads == 0)
287 return 0;
288
289 a.full = dfixed_const(2);
290 b.full = dfixed_const(1);
291 if ((wm->vsc.full > a.full) ||
292 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
293 (wm->vtaps >= 5) ||
294 ((wm->vsc.full >= a.full) && wm->interlaced))
295 max_src_lines_per_dst_line = 4;
296 else
297 max_src_lines_per_dst_line = 2;
298
299 a.full = dfixed_const(available_bandwidth);
300 b.full = dfixed_const(wm->num_heads);
301 a.full = dfixed_div(a, b);
302
303 b.full = dfixed_const(mc_latency + 512);
304 c.full = dfixed_const(wm->disp_clk);
305 b.full = dfixed_div(b, c);
306
307 c.full = dfixed_const(dmif_size);
308 b.full = dfixed_div(c, b);
309
310 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
311
312 b.full = dfixed_const(1000);
313 c.full = dfixed_const(wm->disp_clk);
314 b.full = dfixed_div(c, b);
315 c.full = dfixed_const(wm->bytes_per_pixel);
316 b.full = dfixed_mul(b, c);
317
318 lb_fill_bw = min(tmp, dfixed_trunc(b));
319
320 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
321 b.full = dfixed_const(1000);
322 c.full = dfixed_const(lb_fill_bw);
323 b.full = dfixed_div(c, b);
324 a.full = dfixed_div(a, b);
325 line_fill_time = dfixed_trunc(a);
326
327 if (line_fill_time < wm->active_time)
328 return latency;
329 else
330 return latency + (line_fill_time - wm->active_time);
331
332}
333
334static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
335{
336 if (dce6_average_bandwidth(wm) <=
337 (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
338 return true;
339 else
340 return false;
341};
342
343static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
344{
345 if (dce6_average_bandwidth(wm) <=
346 (dce6_available_bandwidth(wm) / wm->num_heads))
347 return true;
348 else
349 return false;
350};
351
352static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
353{
354 u32 lb_partitions = wm->lb_size / wm->src_width;
355 u32 line_time = wm->active_time + wm->blank_time;
356 u32 latency_tolerant_lines;
357 u32 latency_hiding;
358 fixed20_12 a;
359
360 a.full = dfixed_const(1);
361 if (wm->vsc.full > a.full)
362 latency_tolerant_lines = 1;
363 else {
364 if (lb_partitions <= (wm->vtaps + 1))
365 latency_tolerant_lines = 1;
366 else
367 latency_tolerant_lines = 2;
368 }
369
370 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
371
372 if (dce6_latency_watermark(wm) <= latency_hiding)
373 return true;
374 else
375 return false;
376}
377
378static void dce6_program_watermarks(struct radeon_device *rdev,
379 struct radeon_crtc *radeon_crtc,
380 u32 lb_size, u32 num_heads)
381{
382 struct drm_display_mode *mode = &radeon_crtc->base.mode;
383 struct dce6_wm_params wm;
384 u32 pixel_period;
385 u32 line_time = 0;
386 u32 latency_watermark_a = 0, latency_watermark_b = 0;
387 u32 priority_a_mark = 0, priority_b_mark = 0;
388 u32 priority_a_cnt = PRIORITY_OFF;
389 u32 priority_b_cnt = PRIORITY_OFF;
390 u32 tmp, arb_control3;
391 fixed20_12 a, b, c;
392
393 if (radeon_crtc->base.enabled && num_heads && mode) {
394 pixel_period = 1000000 / (u32)mode->clock;
395 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
396 priority_a_cnt = 0;
397 priority_b_cnt = 0;
398
399 wm.yclk = rdev->pm.current_mclk * 10;
400 wm.sclk = rdev->pm.current_sclk * 10;
401 wm.disp_clk = mode->clock;
402 wm.src_width = mode->crtc_hdisplay;
403 wm.active_time = mode->crtc_hdisplay * pixel_period;
404 wm.blank_time = line_time - wm.active_time;
405 wm.interlaced = false;
406 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
407 wm.interlaced = true;
408 wm.vsc = radeon_crtc->vsc;
409 wm.vtaps = 1;
410 if (radeon_crtc->rmx_type != RMX_OFF)
411 wm.vtaps = 2;
412 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
413 wm.lb_size = lb_size;
414 wm.dram_channels = dce6_get_number_of_dram_channels(rdev);
415 wm.num_heads = num_heads;
416
417 /* set for high clocks */
418 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
419 /* set for low clocks */
420 /* wm.yclk = low clk; wm.sclk = low clk */
421 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
422
423 /* possibly force display priority to high */
424 /* should really do this at mode validation time... */
425 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
426 !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
427 !dce6_check_latency_hiding(&wm) ||
428 (rdev->disp_priority == 2)) {
429 DRM_DEBUG_KMS("force priority to high\n");
430 priority_a_cnt |= PRIORITY_ALWAYS_ON;
431 priority_b_cnt |= PRIORITY_ALWAYS_ON;
432 }
433
434 a.full = dfixed_const(1000);
435 b.full = dfixed_const(mode->clock);
436 b.full = dfixed_div(b, a);
437 c.full = dfixed_const(latency_watermark_a);
438 c.full = dfixed_mul(c, b);
439 c.full = dfixed_mul(c, radeon_crtc->hsc);
440 c.full = dfixed_div(c, a);
441 a.full = dfixed_const(16);
442 c.full = dfixed_div(c, a);
443 priority_a_mark = dfixed_trunc(c);
444 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
445
446 a.full = dfixed_const(1000);
447 b.full = dfixed_const(mode->clock);
448 b.full = dfixed_div(b, a);
449 c.full = dfixed_const(latency_watermark_b);
450 c.full = dfixed_mul(c, b);
451 c.full = dfixed_mul(c, radeon_crtc->hsc);
452 c.full = dfixed_div(c, a);
453 a.full = dfixed_const(16);
454 c.full = dfixed_div(c, a);
455 priority_b_mark = dfixed_trunc(c);
456 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
457 }
458
459 /* select wm A */
460 arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
461 tmp = arb_control3;
462 tmp &= ~LATENCY_WATERMARK_MASK(3);
463 tmp |= LATENCY_WATERMARK_MASK(1);
464 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
465 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
466 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
467 LATENCY_HIGH_WATERMARK(line_time)));
468 /* select wm B */
469 tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
470 tmp &= ~LATENCY_WATERMARK_MASK(3);
471 tmp |= LATENCY_WATERMARK_MASK(2);
472 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
473 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
474 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
475 LATENCY_HIGH_WATERMARK(line_time)));
476 /* restore original selection */
477 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
478
479 /* write the priority marks */
480 WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
481 WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
482
483}
484
485void dce6_bandwidth_update(struct radeon_device *rdev)
486{
487 struct drm_display_mode *mode0 = NULL;
488 struct drm_display_mode *mode1 = NULL;
489 u32 num_heads = 0, lb_size;
490 int i;
491
492 radeon_update_display_priority(rdev);
493
494 for (i = 0; i < rdev->num_crtc; i++) {
495 if (rdev->mode_info.crtcs[i]->base.enabled)
496 num_heads++;
497 }
498 for (i = 0; i < rdev->num_crtc; i += 2) {
499 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
500 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
501 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
502 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
503 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
504 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
505 }
506}
507
0a96d72b
AD
508/*
509 * Core functions
510 */
511static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
512 u32 num_tile_pipes,
513 u32 num_backends_per_asic,
514 u32 *backend_disable_mask_per_asic,
515 u32 num_shader_engines)
516{
517 u32 backend_map = 0;
518 u32 enabled_backends_mask = 0;
519 u32 enabled_backends_count = 0;
520 u32 num_backends_per_se;
521 u32 cur_pipe;
522 u32 swizzle_pipe[SI_MAX_PIPES];
523 u32 cur_backend = 0;
524 u32 i;
525 bool force_no_swizzle;
526
527 /* force legal values */
528 if (num_tile_pipes < 1)
529 num_tile_pipes = 1;
530 if (num_tile_pipes > rdev->config.si.max_tile_pipes)
531 num_tile_pipes = rdev->config.si.max_tile_pipes;
532 if (num_shader_engines < 1)
533 num_shader_engines = 1;
534 if (num_shader_engines > rdev->config.si.max_shader_engines)
535 num_shader_engines = rdev->config.si.max_shader_engines;
536 if (num_backends_per_asic < num_shader_engines)
537 num_backends_per_asic = num_shader_engines;
538 if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
539 num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
540
541 /* make sure we have the same number of backends per se */
542 num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
543 /* set up the number of backends per se */
544 num_backends_per_se = num_backends_per_asic / num_shader_engines;
545 if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
546 num_backends_per_se = rdev->config.si.max_backends_per_se;
547 num_backends_per_asic = num_backends_per_se * num_shader_engines;
548 }
549
550 /* create enable mask and count for enabled backends */
551 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
552 if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
553 enabled_backends_mask |= (1 << i);
554 ++enabled_backends_count;
555 }
556 if (enabled_backends_count == num_backends_per_asic)
557 break;
558 }
559
560 /* force the backends mask to match the current number of backends */
561 if (enabled_backends_count != num_backends_per_asic) {
562 u32 this_backend_enabled;
563 u32 shader_engine;
564 u32 backend_per_se;
565
566 enabled_backends_mask = 0;
567 enabled_backends_count = 0;
568 *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
569 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
570 /* calc the current se */
571 shader_engine = i / rdev->config.si.max_backends_per_se;
572 /* calc the backend per se */
573 backend_per_se = i % rdev->config.si.max_backends_per_se;
574 /* default to not enabled */
575 this_backend_enabled = 0;
576 if ((shader_engine < num_shader_engines) &&
577 (backend_per_se < num_backends_per_se))
578 this_backend_enabled = 1;
579 if (this_backend_enabled) {
580 enabled_backends_mask |= (1 << i);
581 *backend_disable_mask_per_asic &= ~(1 << i);
582 ++enabled_backends_count;
583 }
584 }
585 }
586
587
588 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
589 switch (rdev->family) {
590 case CHIP_TAHITI:
591 case CHIP_PITCAIRN:
592 case CHIP_VERDE:
593 force_no_swizzle = true;
594 break;
595 default:
596 force_no_swizzle = false;
597 break;
598 }
599 if (force_no_swizzle) {
600 bool last_backend_enabled = false;
601
602 force_no_swizzle = false;
603 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
604 if (((enabled_backends_mask >> i) & 1) == 1) {
605 if (last_backend_enabled)
606 force_no_swizzle = true;
607 last_backend_enabled = true;
608 } else
609 last_backend_enabled = false;
610 }
611 }
612
613 switch (num_tile_pipes) {
614 case 1:
615 case 3:
616 case 5:
617 case 7:
618 DRM_ERROR("odd number of pipes!\n");
619 break;
620 case 2:
621 swizzle_pipe[0] = 0;
622 swizzle_pipe[1] = 1;
623 break;
624 case 4:
625 if (force_no_swizzle) {
626 swizzle_pipe[0] = 0;
627 swizzle_pipe[1] = 1;
628 swizzle_pipe[2] = 2;
629 swizzle_pipe[3] = 3;
630 } else {
631 swizzle_pipe[0] = 0;
632 swizzle_pipe[1] = 2;
633 swizzle_pipe[2] = 1;
634 swizzle_pipe[3] = 3;
635 }
636 break;
637 case 6:
638 if (force_no_swizzle) {
639 swizzle_pipe[0] = 0;
640 swizzle_pipe[1] = 1;
641 swizzle_pipe[2] = 2;
642 swizzle_pipe[3] = 3;
643 swizzle_pipe[4] = 4;
644 swizzle_pipe[5] = 5;
645 } else {
646 swizzle_pipe[0] = 0;
647 swizzle_pipe[1] = 2;
648 swizzle_pipe[2] = 4;
649 swizzle_pipe[3] = 1;
650 swizzle_pipe[4] = 3;
651 swizzle_pipe[5] = 5;
652 }
653 break;
654 case 8:
655 if (force_no_swizzle) {
656 swizzle_pipe[0] = 0;
657 swizzle_pipe[1] = 1;
658 swizzle_pipe[2] = 2;
659 swizzle_pipe[3] = 3;
660 swizzle_pipe[4] = 4;
661 swizzle_pipe[5] = 5;
662 swizzle_pipe[6] = 6;
663 swizzle_pipe[7] = 7;
664 } else {
665 swizzle_pipe[0] = 0;
666 swizzle_pipe[1] = 2;
667 swizzle_pipe[2] = 4;
668 swizzle_pipe[3] = 6;
669 swizzle_pipe[4] = 1;
670 swizzle_pipe[5] = 3;
671 swizzle_pipe[6] = 5;
672 swizzle_pipe[7] = 7;
673 }
674 break;
675 }
676
677 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
678 while (((1 << cur_backend) & enabled_backends_mask) == 0)
679 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
680
681 backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
682
683 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
684 }
685
686 return backend_map;
687}
688
689static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
690 u32 disable_mask_per_se,
691 u32 max_disable_mask_per_se,
692 u32 num_shader_engines)
693{
694 u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
695 u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
696
697 if (num_shader_engines == 1)
698 return disable_mask_per_asic;
699 else if (num_shader_engines == 2)
700 return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
701 else
702 return 0xffffffff;
703}
704
705static void si_tiling_mode_table_init(struct radeon_device *rdev)
706{
707 const u32 num_tile_mode_states = 32;
708 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
709
710 switch (rdev->config.si.mem_row_size_in_kb) {
711 case 1:
712 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
713 break;
714 case 2:
715 default:
716 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
717 break;
718 case 4:
719 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
720 break;
721 }
722
723 if ((rdev->family == CHIP_TAHITI) ||
724 (rdev->family == CHIP_PITCAIRN)) {
725 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
726 switch (reg_offset) {
727 case 0: /* non-AA compressed depth or any compressed stencil */
728 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
729 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
730 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
732 NUM_BANKS(ADDR_SURF_16_BANK) |
733 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
736 break;
737 case 1: /* 2xAA/4xAA compressed depth only */
738 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
739 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
740 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
741 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
742 NUM_BANKS(ADDR_SURF_16_BANK) |
743 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
746 break;
747 case 2: /* 8xAA compressed depth only */
748 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
749 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
750 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
751 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
752 NUM_BANKS(ADDR_SURF_16_BANK) |
753 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
756 break;
757 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
758 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
759 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
760 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
762 NUM_BANKS(ADDR_SURF_16_BANK) |
763 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
766 break;
767 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
768 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
769 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
770 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
771 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
772 NUM_BANKS(ADDR_SURF_16_BANK) |
773 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
776 break;
777 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
779 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
780 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
781 TILE_SPLIT(split_equal_to_row_size) |
782 NUM_BANKS(ADDR_SURF_16_BANK) |
783 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
786 break;
787 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
788 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
789 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
790 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
791 TILE_SPLIT(split_equal_to_row_size) |
792 NUM_BANKS(ADDR_SURF_16_BANK) |
793 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
794 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
795 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
796 break;
797 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
798 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
799 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
800 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
801 TILE_SPLIT(split_equal_to_row_size) |
802 NUM_BANKS(ADDR_SURF_16_BANK) |
803 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
804 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
805 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
806 break;
807 case 8: /* 1D and 1D Array Surfaces */
808 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
809 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
810 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
812 NUM_BANKS(ADDR_SURF_16_BANK) |
813 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
816 break;
817 case 9: /* Displayable maps. */
818 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
819 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
820 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
821 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
822 NUM_BANKS(ADDR_SURF_16_BANK) |
823 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
824 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
825 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
826 break;
827 case 10: /* Display 8bpp. */
828 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
829 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
830 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
832 NUM_BANKS(ADDR_SURF_16_BANK) |
833 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
834 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
835 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
836 break;
837 case 11: /* Display 16bpp. */
838 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
839 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
840 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
841 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
842 NUM_BANKS(ADDR_SURF_16_BANK) |
843 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
846 break;
847 case 12: /* Display 32bpp. */
848 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
849 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
850 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
851 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
852 NUM_BANKS(ADDR_SURF_16_BANK) |
853 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
854 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
855 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
856 break;
857 case 13: /* Thin. */
858 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
859 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
860 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
861 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
862 NUM_BANKS(ADDR_SURF_16_BANK) |
863 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
864 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
865 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
866 break;
867 case 14: /* Thin 8 bpp. */
868 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
869 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
870 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
871 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
872 NUM_BANKS(ADDR_SURF_16_BANK) |
873 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
876 break;
877 case 15: /* Thin 16 bpp. */
878 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
879 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
880 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
881 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
882 NUM_BANKS(ADDR_SURF_16_BANK) |
883 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
886 break;
887 case 16: /* Thin 32 bpp. */
888 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
889 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
890 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
891 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
892 NUM_BANKS(ADDR_SURF_16_BANK) |
893 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
894 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
895 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
896 break;
897 case 17: /* Thin 64 bpp. */
898 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
899 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
900 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
901 TILE_SPLIT(split_equal_to_row_size) |
902 NUM_BANKS(ADDR_SURF_16_BANK) |
903 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
906 break;
907 case 21: /* 8 bpp PRT. */
908 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
909 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
910 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
911 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
912 NUM_BANKS(ADDR_SURF_16_BANK) |
913 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
914 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
915 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
916 break;
917 case 22: /* 16 bpp PRT */
918 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
919 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
920 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
921 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
922 NUM_BANKS(ADDR_SURF_16_BANK) |
923 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
926 break;
927 case 23: /* 32 bpp PRT */
928 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
929 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
930 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
932 NUM_BANKS(ADDR_SURF_16_BANK) |
933 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
936 break;
937 case 24: /* 64 bpp PRT */
938 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
940 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
942 NUM_BANKS(ADDR_SURF_16_BANK) |
943 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
946 break;
947 case 25: /* 128 bpp PRT */
948 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
949 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
950 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
951 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
952 NUM_BANKS(ADDR_SURF_8_BANK) |
953 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
956 break;
957 default:
958 gb_tile_moden = 0;
959 break;
960 }
961 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
962 }
963 } else if (rdev->family == CHIP_VERDE) {
964 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
965 switch (reg_offset) {
966 case 0: /* non-AA compressed depth or any compressed stencil */
967 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
968 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
969 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
970 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
971 NUM_BANKS(ADDR_SURF_16_BANK) |
972 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
975 break;
976 case 1: /* 2xAA/4xAA compressed depth only */
977 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
978 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
979 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
981 NUM_BANKS(ADDR_SURF_16_BANK) |
982 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
985 break;
986 case 2: /* 8xAA compressed depth only */
987 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
988 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
989 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
990 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
991 NUM_BANKS(ADDR_SURF_16_BANK) |
992 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
995 break;
996 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
998 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
999 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1001 NUM_BANKS(ADDR_SURF_16_BANK) |
1002 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1005 break;
1006 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1007 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1008 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1009 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1010 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1011 NUM_BANKS(ADDR_SURF_16_BANK) |
1012 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1015 break;
1016 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1017 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1018 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1019 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1020 TILE_SPLIT(split_equal_to_row_size) |
1021 NUM_BANKS(ADDR_SURF_16_BANK) |
1022 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1023 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1024 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1025 break;
1026 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1027 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1028 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1029 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1030 TILE_SPLIT(split_equal_to_row_size) |
1031 NUM_BANKS(ADDR_SURF_16_BANK) |
1032 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1035 break;
1036 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1037 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1038 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1039 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1040 TILE_SPLIT(split_equal_to_row_size) |
1041 NUM_BANKS(ADDR_SURF_16_BANK) |
1042 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1043 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1044 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1045 break;
1046 case 8: /* 1D and 1D Array Surfaces */
1047 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1048 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1049 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1051 NUM_BANKS(ADDR_SURF_16_BANK) |
1052 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1055 break;
1056 case 9: /* Displayable maps. */
1057 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1058 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1059 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1060 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1061 NUM_BANKS(ADDR_SURF_16_BANK) |
1062 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1065 break;
1066 case 10: /* Display 8bpp. */
1067 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1068 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1069 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1070 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1071 NUM_BANKS(ADDR_SURF_16_BANK) |
1072 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1075 break;
1076 case 11: /* Display 16bpp. */
1077 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1078 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1079 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1081 NUM_BANKS(ADDR_SURF_16_BANK) |
1082 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1085 break;
1086 case 12: /* Display 32bpp. */
1087 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1088 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1089 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1090 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1091 NUM_BANKS(ADDR_SURF_16_BANK) |
1092 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1095 break;
1096 case 13: /* Thin. */
1097 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1098 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1099 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1101 NUM_BANKS(ADDR_SURF_16_BANK) |
1102 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1105 break;
1106 case 14: /* Thin 8 bpp. */
1107 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1108 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1109 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1111 NUM_BANKS(ADDR_SURF_16_BANK) |
1112 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1115 break;
1116 case 15: /* Thin 16 bpp. */
1117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1118 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1121 NUM_BANKS(ADDR_SURF_16_BANK) |
1122 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1125 break;
1126 case 16: /* Thin 32 bpp. */
1127 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1128 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1129 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1130 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1131 NUM_BANKS(ADDR_SURF_16_BANK) |
1132 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1135 break;
1136 case 17: /* Thin 64 bpp. */
1137 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1138 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1139 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1140 TILE_SPLIT(split_equal_to_row_size) |
1141 NUM_BANKS(ADDR_SURF_16_BANK) |
1142 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1145 break;
1146 case 21: /* 8 bpp PRT. */
1147 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1148 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1149 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1150 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1151 NUM_BANKS(ADDR_SURF_16_BANK) |
1152 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1155 break;
1156 case 22: /* 16 bpp PRT */
1157 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1159 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1161 NUM_BANKS(ADDR_SURF_16_BANK) |
1162 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1165 break;
1166 case 23: /* 32 bpp PRT */
1167 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1168 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1169 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1170 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1171 NUM_BANKS(ADDR_SURF_16_BANK) |
1172 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1175 break;
1176 case 24: /* 64 bpp PRT */
1177 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1178 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1179 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1180 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1181 NUM_BANKS(ADDR_SURF_16_BANK) |
1182 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1185 break;
1186 case 25: /* 128 bpp PRT */
1187 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1188 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1189 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1190 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1191 NUM_BANKS(ADDR_SURF_8_BANK) |
1192 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1195 break;
1196 default:
1197 gb_tile_moden = 0;
1198 break;
1199 }
1200 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1201 }
1202 } else
1203 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1204}
1205
1206static void si_gpu_init(struct radeon_device *rdev)
1207{
1208 u32 cc_rb_backend_disable = 0;
1209 u32 cc_gc_shader_array_config;
1210 u32 gb_addr_config = 0;
1211 u32 mc_shared_chmap, mc_arb_ramcfg;
1212 u32 gb_backend_map;
1213 u32 cgts_tcc_disable;
1214 u32 sx_debug_1;
1215 u32 gc_user_shader_array_config;
1216 u32 gc_user_rb_backend_disable;
1217 u32 cgts_user_tcc_disable;
1218 u32 hdp_host_path_cntl;
1219 u32 tmp;
1220 int i, j;
1221
1222 switch (rdev->family) {
1223 case CHIP_TAHITI:
1224 rdev->config.si.max_shader_engines = 2;
1225 rdev->config.si.max_pipes_per_simd = 4;
1226 rdev->config.si.max_tile_pipes = 12;
1227 rdev->config.si.max_simds_per_se = 8;
1228 rdev->config.si.max_backends_per_se = 4;
1229 rdev->config.si.max_texture_channel_caches = 12;
1230 rdev->config.si.max_gprs = 256;
1231 rdev->config.si.max_gs_threads = 32;
1232 rdev->config.si.max_hw_contexts = 8;
1233
1234 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1235 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1236 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1237 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1238 break;
1239 case CHIP_PITCAIRN:
1240 rdev->config.si.max_shader_engines = 2;
1241 rdev->config.si.max_pipes_per_simd = 4;
1242 rdev->config.si.max_tile_pipes = 8;
1243 rdev->config.si.max_simds_per_se = 5;
1244 rdev->config.si.max_backends_per_se = 4;
1245 rdev->config.si.max_texture_channel_caches = 8;
1246 rdev->config.si.max_gprs = 256;
1247 rdev->config.si.max_gs_threads = 32;
1248 rdev->config.si.max_hw_contexts = 8;
1249
1250 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1251 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1252 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1253 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1254 break;
1255 case CHIP_VERDE:
1256 default:
1257 rdev->config.si.max_shader_engines = 1;
1258 rdev->config.si.max_pipes_per_simd = 4;
1259 rdev->config.si.max_tile_pipes = 4;
1260 rdev->config.si.max_simds_per_se = 2;
1261 rdev->config.si.max_backends_per_se = 4;
1262 rdev->config.si.max_texture_channel_caches = 4;
1263 rdev->config.si.max_gprs = 256;
1264 rdev->config.si.max_gs_threads = 32;
1265 rdev->config.si.max_hw_contexts = 8;
1266
1267 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1268 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1269 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1270 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1271 break;
1272 }
1273
1274 /* Initialize HDP */
1275 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1276 WREG32((0x2c14 + j), 0x00000000);
1277 WREG32((0x2c18 + j), 0x00000000);
1278 WREG32((0x2c1c + j), 0x00000000);
1279 WREG32((0x2c20 + j), 0x00000000);
1280 WREG32((0x2c24 + j), 0x00000000);
1281 }
1282
1283 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1284
1285 evergreen_fix_pci_max_read_req_size(rdev);
1286
1287 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1288
1289 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1290 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1291
1292 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
1293 cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1294 cgts_tcc_disable = 0xffff0000;
1295 for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
1296 cgts_tcc_disable &= ~(1 << (16 + i));
1297 gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
1298 gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1299 cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
1300
1301 rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
1302 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1303 tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1304 rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
1305 tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1306 rdev->config.si.backend_disable_mask_per_asic =
1307 si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
1308 rdev->config.si.num_shader_engines);
1309 rdev->config.si.backend_map =
1310 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1311 rdev->config.si.num_backends_per_se *
1312 rdev->config.si.num_shader_engines,
1313 &rdev->config.si.backend_disable_mask_per_asic,
1314 rdev->config.si.num_shader_engines);
1315 tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
1316 rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
1317 rdev->config.si.mem_max_burst_length_bytes = 256;
1318 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1319 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1320 if (rdev->config.si.mem_row_size_in_kb > 4)
1321 rdev->config.si.mem_row_size_in_kb = 4;
1322 /* XXX use MC settings? */
1323 rdev->config.si.shader_engine_tile_size = 32;
1324 rdev->config.si.num_gpus = 1;
1325 rdev->config.si.multi_gpu_tile_size = 64;
1326
1327 gb_addr_config = 0;
1328 switch (rdev->config.si.num_tile_pipes) {
1329 case 1:
1330 gb_addr_config |= NUM_PIPES(0);
1331 break;
1332 case 2:
1333 gb_addr_config |= NUM_PIPES(1);
1334 break;
1335 case 4:
1336 gb_addr_config |= NUM_PIPES(2);
1337 break;
1338 case 8:
1339 default:
1340 gb_addr_config |= NUM_PIPES(3);
1341 break;
1342 }
1343
1344 tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
1345 gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
1346 gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
1347 tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
1348 gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
1349 switch (rdev->config.si.num_gpus) {
1350 case 1:
1351 default:
1352 gb_addr_config |= NUM_GPUS(0);
1353 break;
1354 case 2:
1355 gb_addr_config |= NUM_GPUS(1);
1356 break;
1357 case 4:
1358 gb_addr_config |= NUM_GPUS(2);
1359 break;
1360 }
1361 switch (rdev->config.si.multi_gpu_tile_size) {
1362 case 16:
1363 gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
1364 break;
1365 case 32:
1366 default:
1367 gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
1368 break;
1369 case 64:
1370 gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
1371 break;
1372 case 128:
1373 gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
1374 break;
1375 }
1376 switch (rdev->config.si.mem_row_size_in_kb) {
1377 case 1:
1378 default:
1379 gb_addr_config |= ROW_SIZE(0);
1380 break;
1381 case 2:
1382 gb_addr_config |= ROW_SIZE(1);
1383 break;
1384 case 4:
1385 gb_addr_config |= ROW_SIZE(2);
1386 break;
1387 }
1388
1389 tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
1390 rdev->config.si.num_tile_pipes = (1 << tmp);
1391 tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
1392 rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
1393 tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
1394 rdev->config.si.num_shader_engines = tmp + 1;
1395 tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
1396 rdev->config.si.num_gpus = tmp + 1;
1397 tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
1398 rdev->config.si.multi_gpu_tile_size = 1 << tmp;
1399 tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
1400 rdev->config.si.mem_row_size_in_kb = 1 << tmp;
1401
1402 gb_backend_map =
1403 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1404 rdev->config.si.num_backends_per_se *
1405 rdev->config.si.num_shader_engines,
1406 &rdev->config.si.backend_disable_mask_per_asic,
1407 rdev->config.si.num_shader_engines);
1408
1409 /* setup tiling info dword. gb_addr_config is not adequate since it does
1410 * not have bank info, so create a custom tiling dword.
1411 * bits 3:0 num_pipes
1412 * bits 7:4 num_banks
1413 * bits 11:8 group_size
1414 * bits 15:12 row_size
1415 */
1416 rdev->config.si.tile_config = 0;
1417 switch (rdev->config.si.num_tile_pipes) {
1418 case 1:
1419 rdev->config.si.tile_config |= (0 << 0);
1420 break;
1421 case 2:
1422 rdev->config.si.tile_config |= (1 << 0);
1423 break;
1424 case 4:
1425 rdev->config.si.tile_config |= (2 << 0);
1426 break;
1427 case 8:
1428 default:
1429 /* XXX what about 12? */
1430 rdev->config.si.tile_config |= (3 << 0);
1431 break;
1432 }
1433 rdev->config.si.tile_config |=
1434 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
1435 rdev->config.si.tile_config |=
1436 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1437 rdev->config.si.tile_config |=
1438 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1439
1440 rdev->config.si.backend_map = gb_backend_map;
1441 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1442 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1443 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1444
1445 /* primary versions */
1446 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1447 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1448 WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1449
1450 WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1451
1452 /* user versions */
1453 WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1454 WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1455 WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1456
1457 WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1458
1459 si_tiling_mode_table_init(rdev);
1460
1461 /* set HW defaults for 3D engine */
1462 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1463 ROQ_IB2_START(0x2b)));
1464 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1465
1466 sx_debug_1 = RREG32(SX_DEBUG_1);
1467 WREG32(SX_DEBUG_1, sx_debug_1);
1468
1469 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1470
1471 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1472 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1473 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1474 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1475
1476 WREG32(VGT_NUM_INSTANCES, 1);
1477
1478 WREG32(CP_PERFMON_CNTL, 0);
1479
1480 WREG32(SQ_CONFIG, 0);
1481
1482 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1483 FORCE_EOV_MAX_REZ_CNT(255)));
1484
1485 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1486 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1487
1488 WREG32(VGT_GS_VERTEX_REUSE, 16);
1489 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1490
1491 WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1492 WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1493 WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1494 WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1495 WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1496 WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1497 WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1498 WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1499
1500 tmp = RREG32(HDP_MISC_CNTL);
1501 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1502 WREG32(HDP_MISC_CNTL, tmp);
1503
1504 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1505 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1506
1507 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1508
1509 udelay(50);
1510}