Commit | Line | Data |
---|---|---|
2483b4ea CK |
1 | /* |
2 | * Copyright 2010 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | * Authors: Alex Deucher | |
23 | */ | |
24 | #include <drm/drmP.h> | |
25 | #include "radeon.h" | |
26 | #include "radeon_asic.h" | |
74d360f6 | 27 | #include "radeon_trace.h" |
2483b4ea CK |
28 | #include "nid.h" |
29 | ||
30 | u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); | |
31 | ||
32 | /* | |
33 | * DMA | |
34 | * Starting with R600, the GPU has an asynchronous | |
35 | * DMA engine. The programming model is very similar | |
36 | * to the 3D engine (ring buffer, IBs, etc.), but the | |
37 | * DMA controller has it's own packet format that is | |
38 | * different form the PM4 format used by the 3D engine. | |
39 | * It supports copying data, writing embedded data, | |
40 | * solid fills, and a number of other things. It also | |
41 | * has support for tiling/detiling of buffers. | |
42 | * Cayman and newer support two asynchronous DMA engines. | |
43 | */ | |
44 | ||
ea31bf69 AD |
45 | /** |
46 | * cayman_dma_get_rptr - get the current read pointer | |
47 | * | |
48 | * @rdev: radeon_device pointer | |
49 | * @ring: radeon ring pointer | |
50 | * | |
51 | * Get the current rptr from the hardware (cayman+). | |
52 | */ | |
53 | uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, | |
54 | struct radeon_ring *ring) | |
55 | { | |
56 | u32 rptr, reg; | |
57 | ||
58 | if (rdev->wb.enabled) { | |
59 | rptr = rdev->wb.wb[ring->rptr_offs/4]; | |
60 | } else { | |
61 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
62 | reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; | |
63 | else | |
64 | reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; | |
65 | ||
66 | rptr = RREG32(reg); | |
67 | } | |
68 | ||
69 | return (rptr & 0x3fffc) >> 2; | |
70 | } | |
71 | ||
72 | /** | |
73 | * cayman_dma_get_wptr - get the current write pointer | |
74 | * | |
75 | * @rdev: radeon_device pointer | |
76 | * @ring: radeon ring pointer | |
77 | * | |
78 | * Get the current wptr from the hardware (cayman+). | |
79 | */ | |
80 | uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, | |
81 | struct radeon_ring *ring) | |
82 | { | |
83 | u32 reg; | |
84 | ||
85 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
86 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; | |
87 | else | |
88 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; | |
89 | ||
90 | return (RREG32(reg) & 0x3fffc) >> 2; | |
91 | } | |
92 | ||
93 | /** | |
94 | * cayman_dma_set_wptr - commit the write pointer | |
95 | * | |
96 | * @rdev: radeon_device pointer | |
97 | * @ring: radeon ring pointer | |
98 | * | |
99 | * Write the wptr back to the hardware (cayman+). | |
100 | */ | |
101 | void cayman_dma_set_wptr(struct radeon_device *rdev, | |
102 | struct radeon_ring *ring) | |
103 | { | |
104 | u32 reg; | |
105 | ||
106 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
107 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; | |
108 | else | |
109 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; | |
110 | ||
111 | WREG32(reg, (ring->wptr << 2) & 0x3fffc); | |
112 | } | |
113 | ||
2483b4ea CK |
114 | /** |
115 | * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine | |
116 | * | |
117 | * @rdev: radeon_device pointer | |
118 | * @ib: IB object to schedule | |
119 | * | |
120 | * Schedule an IB in the DMA ring (cayman-SI). | |
121 | */ | |
122 | void cayman_dma_ring_ib_execute(struct radeon_device *rdev, | |
123 | struct radeon_ib *ib) | |
124 | { | |
125 | struct radeon_ring *ring = &rdev->ring[ib->ring]; | |
7c42bc1a | 126 | unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; |
2483b4ea CK |
127 | |
128 | if (rdev->wb.enabled) { | |
129 | u32 next_rptr = ring->wptr + 4; | |
130 | while ((next_rptr & 7) != 5) | |
131 | next_rptr++; | |
132 | next_rptr += 3; | |
133 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | |
134 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | |
135 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | |
136 | radeon_ring_write(ring, next_rptr); | |
137 | } | |
138 | ||
139 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | |
140 | * Pad as necessary with NOPs. | |
141 | */ | |
142 | while ((ring->wptr & 7) != 5) | |
143 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | |
7c42bc1a | 144 | radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); |
2483b4ea CK |
145 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); |
146 | radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | |
147 | ||
148 | } | |
149 | ||
150 | /** | |
151 | * cayman_dma_stop - stop the async dma engines | |
152 | * | |
153 | * @rdev: radeon_device pointer | |
154 | * | |
155 | * Stop the async dma engines (cayman-SI). | |
156 | */ | |
157 | void cayman_dma_stop(struct radeon_device *rdev) | |
158 | { | |
159 | u32 rb_cntl; | |
160 | ||
50efa51a AD |
161 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
162 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) | |
163 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | |
2483b4ea CK |
164 | |
165 | /* dma0 */ | |
166 | rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); | |
167 | rb_cntl &= ~DMA_RB_ENABLE; | |
168 | WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); | |
169 | ||
170 | /* dma1 */ | |
171 | rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); | |
172 | rb_cntl &= ~DMA_RB_ENABLE; | |
173 | WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); | |
174 | ||
175 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; | |
176 | rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; | |
177 | } | |
178 | ||
179 | /** | |
180 | * cayman_dma_resume - setup and start the async dma engines | |
181 | * | |
182 | * @rdev: radeon_device pointer | |
183 | * | |
184 | * Set up the DMA ring buffers and enable them. (cayman-SI). | |
185 | * Returns 0 for success, error for failure. | |
186 | */ | |
187 | int cayman_dma_resume(struct radeon_device *rdev) | |
188 | { | |
189 | struct radeon_ring *ring; | |
190 | u32 rb_cntl, dma_cntl, ib_cntl; | |
191 | u32 rb_bufsz; | |
192 | u32 reg_offset, wb_offset; | |
193 | int i, r; | |
194 | ||
2483b4ea CK |
195 | for (i = 0; i < 2; i++) { |
196 | if (i == 0) { | |
197 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | |
198 | reg_offset = DMA0_REGISTER_OFFSET; | |
199 | wb_offset = R600_WB_DMA_RPTR_OFFSET; | |
200 | } else { | |
201 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; | |
202 | reg_offset = DMA1_REGISTER_OFFSET; | |
203 | wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; | |
204 | } | |
205 | ||
206 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); | |
207 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); | |
208 | ||
209 | /* Set ring buffer size in dwords */ | |
9c725e5b | 210 | rb_bufsz = order_base_2(ring->ring_size / 4); |
2483b4ea CK |
211 | rb_cntl = rb_bufsz << 1; |
212 | #ifdef __BIG_ENDIAN | |
213 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; | |
214 | #endif | |
215 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); | |
216 | ||
217 | /* Initialize the ring buffer's read and write pointers */ | |
218 | WREG32(DMA_RB_RPTR + reg_offset, 0); | |
219 | WREG32(DMA_RB_WPTR + reg_offset, 0); | |
220 | ||
221 | /* set the wb address whether it's enabled or not */ | |
222 | WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, | |
223 | upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); | |
224 | WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, | |
225 | ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); | |
226 | ||
227 | if (rdev->wb.enabled) | |
228 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; | |
229 | ||
230 | WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); | |
231 | ||
232 | /* enable DMA IBs */ | |
233 | ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; | |
234 | #ifdef __BIG_ENDIAN | |
235 | ib_cntl |= DMA_IB_SWAP_ENABLE; | |
236 | #endif | |
237 | WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); | |
238 | ||
239 | dma_cntl = RREG32(DMA_CNTL + reg_offset); | |
240 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; | |
241 | WREG32(DMA_CNTL + reg_offset, dma_cntl); | |
242 | ||
243 | ring->wptr = 0; | |
244 | WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); | |
245 | ||
2483b4ea CK |
246 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); |
247 | ||
248 | ring->ready = true; | |
249 | ||
250 | r = radeon_ring_test(rdev, ring->idx, ring); | |
251 | if (r) { | |
252 | ring->ready = false; | |
253 | return r; | |
254 | } | |
255 | } | |
256 | ||
50efa51a AD |
257 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
258 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) | |
259 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); | |
2483b4ea CK |
260 | |
261 | return 0; | |
262 | } | |
263 | ||
264 | /** | |
265 | * cayman_dma_fini - tear down the async dma engines | |
266 | * | |
267 | * @rdev: radeon_device pointer | |
268 | * | |
269 | * Stop the async dma engines and free the rings (cayman-SI). | |
270 | */ | |
271 | void cayman_dma_fini(struct radeon_device *rdev) | |
272 | { | |
273 | cayman_dma_stop(rdev); | |
274 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); | |
275 | radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); | |
276 | } | |
277 | ||
278 | /** | |
279 | * cayman_dma_is_lockup - Check if the DMA engine is locked up | |
280 | * | |
281 | * @rdev: radeon_device pointer | |
282 | * @ring: radeon_ring structure holding ring information | |
283 | * | |
284 | * Check if the async DMA engine is locked up. | |
285 | * Returns true if the engine appears to be locked up, false if not. | |
286 | */ | |
287 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |
288 | { | |
289 | u32 reset_mask = cayman_gpu_check_soft_reset(rdev); | |
290 | u32 mask; | |
291 | ||
292 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
293 | mask = RADEON_RESET_DMA; | |
294 | else | |
295 | mask = RADEON_RESET_DMA1; | |
296 | ||
297 | if (!(reset_mask & mask)) { | |
ff212f25 | 298 | radeon_ring_lockup_update(rdev, ring); |
2483b4ea CK |
299 | return false; |
300 | } | |
2483b4ea CK |
301 | return radeon_ring_test_lockup(rdev, ring); |
302 | } | |
303 | ||
304 | /** | |
03f62abd CK |
305 | * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART |
306 | * | |
307 | * @rdev: radeon_device pointer | |
308 | * @ib: indirect buffer to fill with commands | |
309 | * @pe: addr of the page entry | |
310 | * @src: src addr where to copy from | |
311 | * @count: number of page entries to update | |
312 | * | |
313 | * Update PTEs by copying them from the GART using the DMA (cayman/TN). | |
314 | */ | |
315 | void cayman_dma_vm_copy_pages(struct radeon_device *rdev, | |
316 | struct radeon_ib *ib, | |
317 | uint64_t pe, uint64_t src, | |
318 | unsigned count) | |
319 | { | |
320 | unsigned ndw; | |
321 | ||
322 | while (count) { | |
323 | ndw = count * 2; | |
324 | if (ndw > 0xFFFFE) | |
325 | ndw = 0xFFFFE; | |
326 | ||
327 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, | |
328 | 0, 0, ndw); | |
329 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); | |
330 | ib->ptr[ib->length_dw++] = lower_32_bits(src); | |
331 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
332 | ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; | |
333 | ||
334 | pe += ndw * 4; | |
335 | src += ndw * 4; | |
336 | count -= ndw / 2; | |
337 | } | |
338 | } | |
339 | ||
340 | /** | |
341 | * cayman_dma_vm_write_pages - update PTEs by writing them manually | |
2483b4ea CK |
342 | * |
343 | * @rdev: radeon_device pointer | |
344 | * @ib: indirect buffer to fill with commands | |
345 | * @pe: addr of the page entry | |
346 | * @addr: dst addr to write into pe | |
347 | * @count: number of page entries to update | |
348 | * @incr: increase next addr by incr bytes | |
03f62abd | 349 | * @flags: hw access flags |
2483b4ea | 350 | * |
03f62abd | 351 | * Update PTEs by writing them manually using the DMA (cayman/TN). |
2483b4ea | 352 | */ |
03f62abd CK |
353 | void cayman_dma_vm_write_pages(struct radeon_device *rdev, |
354 | struct radeon_ib *ib, | |
355 | uint64_t pe, | |
356 | uint64_t addr, unsigned count, | |
357 | uint32_t incr, uint32_t flags) | |
2483b4ea | 358 | { |
2483b4ea CK |
359 | uint64_t value; |
360 | unsigned ndw; | |
361 | ||
03f62abd CK |
362 | while (count) { |
363 | ndw = count * 2; | |
364 | if (ndw > 0xFFFFE) | |
365 | ndw = 0xFFFFE; | |
366 | ||
367 | /* for non-physically contiguous pages (system) */ | |
368 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, | |
369 | 0, 0, ndw); | |
370 | ib->ptr[ib->length_dw++] = pe; | |
371 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
372 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | |
373 | if (flags & R600_PTE_SYSTEM) { | |
374 | value = radeon_vm_map_gart(rdev, addr); | |
03f62abd | 375 | } else if (flags & R600_PTE_VALID) { |
2483b4ea | 376 | value = addr; |
03f62abd | 377 | } else { |
2483b4ea | 378 | value = 0; |
03f62abd CK |
379 | } |
380 | addr += incr; | |
381 | value |= flags; | |
382 | ib->ptr[ib->length_dw++] = value; | |
2483b4ea | 383 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
2483b4ea CK |
384 | } |
385 | } | |
03f62abd CK |
386 | } |
387 | ||
388 | /** | |
389 | * cayman_dma_vm_set_pages - update the page tables using the DMA | |
390 | * | |
391 | * @rdev: radeon_device pointer | |
392 | * @ib: indirect buffer to fill with commands | |
393 | * @pe: addr of the page entry | |
394 | * @addr: dst addr to write into pe | |
395 | * @count: number of page entries to update | |
396 | * @incr: increase next addr by incr bytes | |
397 | * @flags: hw access flags | |
398 | * | |
399 | * Update the page tables using the DMA (cayman/TN). | |
400 | */ | |
401 | void cayman_dma_vm_set_pages(struct radeon_device *rdev, | |
402 | struct radeon_ib *ib, | |
403 | uint64_t pe, | |
404 | uint64_t addr, unsigned count, | |
405 | uint32_t incr, uint32_t flags) | |
406 | { | |
407 | uint64_t value; | |
408 | unsigned ndw; | |
409 | ||
410 | while (count) { | |
411 | ndw = count * 2; | |
412 | if (ndw > 0xFFFFE) | |
413 | ndw = 0xFFFFE; | |
414 | ||
415 | if (flags & R600_PTE_VALID) | |
416 | value = addr; | |
417 | else | |
418 | value = 0; | |
419 | ||
420 | /* for physically contiguous pages (vram) */ | |
421 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); | |
422 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | |
423 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
424 | ib->ptr[ib->length_dw++] = flags; /* mask */ | |
425 | ib->ptr[ib->length_dw++] = 0; | |
426 | ib->ptr[ib->length_dw++] = value; /* value */ | |
427 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | |
428 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | |
429 | ib->ptr[ib->length_dw++] = 0; | |
430 | ||
431 | pe += ndw * 4; | |
432 | addr += (ndw / 2) * incr; | |
433 | count -= ndw / 2; | |
434 | } | |
435 | } | |
436 | ||
437 | /** | |
438 | * cayman_dma_vm_pad_ib - pad the IB to the required number of dw | |
439 | * | |
440 | * @ib: indirect buffer to fill with padding | |
441 | * | |
442 | */ | |
443 | void cayman_dma_vm_pad_ib(struct radeon_ib *ib) | |
444 | { | |
2483b4ea CK |
445 | while (ib->length_dw & 0x7) |
446 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); | |
447 | } | |
448 | ||
faffaf62 CK |
449 | void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, |
450 | unsigned vm_id, uint64_t pd_addr) | |
2483b4ea | 451 | { |
2483b4ea | 452 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); |
faffaf62 CK |
453 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); |
454 | radeon_ring_write(ring, pd_addr >> 12); | |
2483b4ea CK |
455 | |
456 | /* flush hdp cache */ | |
457 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | |
458 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); | |
459 | radeon_ring_write(ring, 1); | |
460 | ||
461 | /* bits 0-7 are the VM contexts0-7 */ | |
462 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | |
463 | radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); | |
faffaf62 | 464 | radeon_ring_write(ring, 1 << vm_id); |
cbfc35b9 AD |
465 | |
466 | /* wait for invalidate to complete */ | |
467 | radeon_ring_write(ring, DMA_SRBM_READ_PACKET); | |
468 | radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); | |
469 | radeon_ring_write(ring, 0); /* mask */ | |
470 | radeon_ring_write(ring, 0); /* value */ | |
2483b4ea CK |
471 | } |
472 |