Commit | Line | Data |
---|---|---|
771fe6b9 JG |
1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. | |
3 | * Copyright 2008 Red Hat Inc. | |
4 | * Copyright 2009 Jerome Glisse. | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a | |
7 | * copy of this software and associated documentation files (the "Software"), | |
8 | * to deal in the Software without restriction, including without limitation | |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
10 | * and/or sell copies of the Software, and to permit persons to whom the | |
11 | * Software is furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
22 | * OTHER DEALINGS IN THE SOFTWARE. | |
23 | * | |
24 | * Authors: Dave Airlie | |
25 | * Alex Deucher | |
26 | * Jerome Glisse | |
27 | */ | |
760285e7 DH |
28 | #include <drm/drmP.h> |
29 | #include <drm/radeon_drm.h> | |
771fe6b9 JG |
30 | #include "radeon.h" |
31 | #include "radeon_reg.h" | |
84d597b7 | 32 | #include "radeon_trace.h" |
771fe6b9 | 33 | |
03eec93b AD |
34 | /* |
35 | * GART | |
36 | * The GART (Graphics Aperture Remapping Table) is an aperture | |
37 | * in the GPU's address space. System pages can be mapped into | |
38 | * the aperture and look like contiguous pages from the GPU's | |
39 | * perspective. A page table maps the pages in the aperture | |
40 | * to the actual backing pages in system memory. | |
41 | * | |
42 | * Radeon GPUs support both an internal GART, as described above, | |
43 | * and AGP. AGP works similarly, but the GART table is configured | |
44 | * and maintained by the northbridge rather than the driver. | |
45 | * Radeon hw has a separate AGP aperture that is programmed to | |
46 | * point to the AGP aperture provided by the northbridge and the | |
47 | * requests are passed through to the northbridge aperture. | |
48 | * Both AGP and internal GART can be used at the same time, however | |
49 | * that is not currently supported by the driver. | |
50 | * | |
51 | * This file handles the common internal GART management. | |
52 | */ | |
53 | ||
771fe6b9 JG |
54 | /* |
55 | * Common GART table functions. | |
56 | */ | |
03eec93b AD |
57 | /** |
58 | * radeon_gart_table_ram_alloc - allocate system ram for gart page table | |
59 | * | |
60 | * @rdev: radeon_device pointer | |
61 | * | |
62 | * Allocate system memory for GART page table | |
63 | * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the | |
64 | * gart table to be in system memory. | |
65 | * Returns 0 for success, -ENOMEM for failure. | |
66 | */ | |
771fe6b9 JG |
67 | int radeon_gart_table_ram_alloc(struct radeon_device *rdev) |
68 | { | |
69 | void *ptr; | |
70 | ||
71 | ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size, | |
72 | &rdev->gart.table_addr); | |
73 | if (ptr == NULL) { | |
74 | return -ENOMEM; | |
75 | } | |
76 | #ifdef CONFIG_X86 | |
77 | if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 || | |
78 | rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) { | |
79 | set_memory_uc((unsigned long)ptr, | |
80 | rdev->gart.table_size >> PAGE_SHIFT); | |
81 | } | |
82 | #endif | |
c9a1be96 JG |
83 | rdev->gart.ptr = ptr; |
84 | memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size); | |
771fe6b9 JG |
85 | return 0; |
86 | } | |
87 | ||
03eec93b AD |
88 | /** |
89 | * radeon_gart_table_ram_free - free system ram for gart page table | |
90 | * | |
91 | * @rdev: radeon_device pointer | |
92 | * | |
93 | * Free system memory for GART page table | |
94 | * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the | |
95 | * gart table to be in system memory. | |
96 | */ | |
771fe6b9 JG |
97 | void radeon_gart_table_ram_free(struct radeon_device *rdev) |
98 | { | |
c9a1be96 | 99 | if (rdev->gart.ptr == NULL) { |
771fe6b9 JG |
100 | return; |
101 | } | |
102 | #ifdef CONFIG_X86 | |
103 | if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 || | |
104 | rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) { | |
c9a1be96 | 105 | set_memory_wb((unsigned long)rdev->gart.ptr, |
771fe6b9 JG |
106 | rdev->gart.table_size >> PAGE_SHIFT); |
107 | } | |
108 | #endif | |
109 | pci_free_consistent(rdev->pdev, rdev->gart.table_size, | |
c9a1be96 | 110 | (void *)rdev->gart.ptr, |
771fe6b9 | 111 | rdev->gart.table_addr); |
c9a1be96 | 112 | rdev->gart.ptr = NULL; |
771fe6b9 JG |
113 | rdev->gart.table_addr = 0; |
114 | } | |
115 | ||
03eec93b AD |
116 | /** |
117 | * radeon_gart_table_vram_alloc - allocate vram for gart page table | |
118 | * | |
119 | * @rdev: radeon_device pointer | |
120 | * | |
121 | * Allocate video memory for GART page table | |
122 | * (pcie r4xx, r5xx+). These asics require the | |
123 | * gart table to be in video memory. | |
124 | * Returns 0 for success, error for failure. | |
125 | */ | |
771fe6b9 JG |
126 | int radeon_gart_table_vram_alloc(struct radeon_device *rdev) |
127 | { | |
771fe6b9 JG |
128 | int r; |
129 | ||
c9a1be96 | 130 | if (rdev->gart.robj == NULL) { |
441921d5 | 131 | r = radeon_bo_create(rdev, rdev->gart.table_size, |
268b2510 | 132 | PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, |
40f5cf99 | 133 | NULL, &rdev->gart.robj); |
771fe6b9 JG |
134 | if (r) { |
135 | return r; | |
136 | } | |
137 | } | |
4aac0473 JG |
138 | return 0; |
139 | } | |
140 | ||
03eec93b AD |
141 | /** |
142 | * radeon_gart_table_vram_pin - pin gart page table in vram | |
143 | * | |
144 | * @rdev: radeon_device pointer | |
145 | * | |
146 | * Pin the GART page table in vram so it will not be moved | |
147 | * by the memory manager (pcie r4xx, r5xx+). These asics require the | |
148 | * gart table to be in video memory. | |
149 | * Returns 0 for success, error for failure. | |
150 | */ | |
4aac0473 JG |
151 | int radeon_gart_table_vram_pin(struct radeon_device *rdev) |
152 | { | |
153 | uint64_t gpu_addr; | |
154 | int r; | |
155 | ||
c9a1be96 | 156 | r = radeon_bo_reserve(rdev->gart.robj, false); |
4c788679 | 157 | if (unlikely(r != 0)) |
771fe6b9 | 158 | return r; |
c9a1be96 | 159 | r = radeon_bo_pin(rdev->gart.robj, |
4c788679 | 160 | RADEON_GEM_DOMAIN_VRAM, &gpu_addr); |
771fe6b9 | 161 | if (r) { |
c9a1be96 | 162 | radeon_bo_unreserve(rdev->gart.robj); |
771fe6b9 JG |
163 | return r; |
164 | } | |
c9a1be96 | 165 | r = radeon_bo_kmap(rdev->gart.robj, &rdev->gart.ptr); |
4c788679 | 166 | if (r) |
c9a1be96 JG |
167 | radeon_bo_unpin(rdev->gart.robj); |
168 | radeon_bo_unreserve(rdev->gart.robj); | |
771fe6b9 | 169 | rdev->gart.table_addr = gpu_addr; |
4c788679 | 170 | return r; |
771fe6b9 JG |
171 | } |
172 | ||
03eec93b AD |
173 | /** |
174 | * radeon_gart_table_vram_unpin - unpin gart page table in vram | |
175 | * | |
176 | * @rdev: radeon_device pointer | |
177 | * | |
178 | * Unpin the GART page table in vram (pcie r4xx, r5xx+). | |
179 | * These asics require the gart table to be in video memory. | |
180 | */ | |
c9a1be96 | 181 | void radeon_gart_table_vram_unpin(struct radeon_device *rdev) |
771fe6b9 | 182 | { |
4c788679 JG |
183 | int r; |
184 | ||
c9a1be96 | 185 | if (rdev->gart.robj == NULL) { |
771fe6b9 JG |
186 | return; |
187 | } | |
c9a1be96 | 188 | r = radeon_bo_reserve(rdev->gart.robj, false); |
4c788679 | 189 | if (likely(r == 0)) { |
c9a1be96 JG |
190 | radeon_bo_kunmap(rdev->gart.robj); |
191 | radeon_bo_unpin(rdev->gart.robj); | |
192 | radeon_bo_unreserve(rdev->gart.robj); | |
193 | rdev->gart.ptr = NULL; | |
194 | } | |
195 | } | |
196 | ||
03eec93b AD |
197 | /** |
198 | * radeon_gart_table_vram_free - free gart page table vram | |
199 | * | |
200 | * @rdev: radeon_device pointer | |
201 | * | |
202 | * Free the video memory used for the GART page table | |
203 | * (pcie r4xx, r5xx+). These asics require the gart table to | |
204 | * be in video memory. | |
205 | */ | |
c9a1be96 JG |
206 | void radeon_gart_table_vram_free(struct radeon_device *rdev) |
207 | { | |
208 | if (rdev->gart.robj == NULL) { | |
209 | return; | |
4c788679 | 210 | } |
c9a1be96 | 211 | radeon_bo_unref(&rdev->gart.robj); |
771fe6b9 JG |
212 | } |
213 | ||
771fe6b9 JG |
214 | /* |
215 | * Common gart functions. | |
216 | */ | |
03eec93b AD |
217 | /** |
218 | * radeon_gart_unbind - unbind pages from the gart page table | |
219 | * | |
220 | * @rdev: radeon_device pointer | |
221 | * @offset: offset into the GPU's gart aperture | |
222 | * @pages: number of pages to unbind | |
223 | * | |
224 | * Unbinds the requested pages from the gart page table and | |
225 | * replaces them with the dummy page (all asics). | |
226 | */ | |
771fe6b9 JG |
227 | void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, |
228 | int pages) | |
229 | { | |
230 | unsigned t; | |
231 | unsigned p; | |
232 | int i, j; | |
82568565 | 233 | u64 page_base; |
771fe6b9 JG |
234 | |
235 | if (!rdev->gart.ready) { | |
fcf4de5a | 236 | WARN(1, "trying to unbind memory from uninitialized GART !\n"); |
771fe6b9 JG |
237 | return; |
238 | } | |
a77f1718 MT |
239 | t = offset / RADEON_GPU_PAGE_SIZE; |
240 | p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); | |
771fe6b9 JG |
241 | for (i = 0; i < pages; i++, p++) { |
242 | if (rdev->gart.pages[p]) { | |
771fe6b9 | 243 | rdev->gart.pages[p] = NULL; |
82568565 DA |
244 | rdev->gart.pages_addr[p] = rdev->dummy_page.addr; |
245 | page_base = rdev->gart.pages_addr[p]; | |
a77f1718 | 246 | for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { |
c9a1be96 JG |
247 | if (rdev->gart.ptr) { |
248 | radeon_gart_set_page(rdev, t, page_base); | |
249 | } | |
82568565 | 250 | page_base += RADEON_GPU_PAGE_SIZE; |
771fe6b9 JG |
251 | } |
252 | } | |
253 | } | |
254 | mb(); | |
255 | radeon_gart_tlb_flush(rdev); | |
256 | } | |
257 | ||
03eec93b AD |
258 | /** |
259 | * radeon_gart_bind - bind pages into the gart page table | |
260 | * | |
261 | * @rdev: radeon_device pointer | |
262 | * @offset: offset into the GPU's gart aperture | |
263 | * @pages: number of pages to bind | |
264 | * @pagelist: pages to bind | |
265 | * @dma_addr: DMA addresses of pages | |
266 | * | |
267 | * Binds the requested pages to the gart page table | |
268 | * (all asics). | |
269 | * Returns 0 for success, -EINVAL for failure. | |
270 | */ | |
771fe6b9 | 271 | int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, |
c39d3516 | 272 | int pages, struct page **pagelist, dma_addr_t *dma_addr) |
771fe6b9 JG |
273 | { |
274 | unsigned t; | |
275 | unsigned p; | |
276 | uint64_t page_base; | |
277 | int i, j; | |
278 | ||
279 | if (!rdev->gart.ready) { | |
fcf4de5a | 280 | WARN(1, "trying to bind memory to uninitialized GART !\n"); |
771fe6b9 JG |
281 | return -EINVAL; |
282 | } | |
a77f1718 MT |
283 | t = offset / RADEON_GPU_PAGE_SIZE; |
284 | p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); | |
771fe6b9 JG |
285 | |
286 | for (i = 0; i < pages; i++, p++) { | |
c52494f6 | 287 | rdev->gart.pages_addr[p] = dma_addr[i]; |
771fe6b9 | 288 | rdev->gart.pages[p] = pagelist[i]; |
c9a1be96 JG |
289 | if (rdev->gart.ptr) { |
290 | page_base = rdev->gart.pages_addr[p]; | |
291 | for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { | |
292 | radeon_gart_set_page(rdev, t, page_base); | |
293 | page_base += RADEON_GPU_PAGE_SIZE; | |
294 | } | |
771fe6b9 JG |
295 | } |
296 | } | |
297 | mb(); | |
298 | radeon_gart_tlb_flush(rdev); | |
299 | return 0; | |
300 | } | |
301 | ||
03eec93b AD |
302 | /** |
303 | * radeon_gart_restore - bind all pages in the gart page table | |
304 | * | |
305 | * @rdev: radeon_device pointer | |
306 | * | |
307 | * Binds all pages in the gart page table (all asics). | |
308 | * Used to rebuild the gart table on device startup or resume. | |
309 | */ | |
82568565 DA |
310 | void radeon_gart_restore(struct radeon_device *rdev) |
311 | { | |
312 | int i, j, t; | |
313 | u64 page_base; | |
314 | ||
c9a1be96 JG |
315 | if (!rdev->gart.ptr) { |
316 | return; | |
317 | } | |
82568565 DA |
318 | for (i = 0, t = 0; i < rdev->gart.num_cpu_pages; i++) { |
319 | page_base = rdev->gart.pages_addr[i]; | |
320 | for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { | |
321 | radeon_gart_set_page(rdev, t, page_base); | |
322 | page_base += RADEON_GPU_PAGE_SIZE; | |
323 | } | |
324 | } | |
325 | mb(); | |
326 | radeon_gart_tlb_flush(rdev); | |
327 | } | |
328 | ||
03eec93b AD |
329 | /** |
330 | * radeon_gart_init - init the driver info for managing the gart | |
331 | * | |
332 | * @rdev: radeon_device pointer | |
333 | * | |
334 | * Allocate the dummy page and init the gart driver info (all asics). | |
335 | * Returns 0 for success, error for failure. | |
336 | */ | |
771fe6b9 JG |
337 | int radeon_gart_init(struct radeon_device *rdev) |
338 | { | |
82568565 DA |
339 | int r, i; |
340 | ||
771fe6b9 JG |
341 | if (rdev->gart.pages) { |
342 | return 0; | |
343 | } | |
a77f1718 MT |
344 | /* We need PAGE_SIZE >= RADEON_GPU_PAGE_SIZE */ |
345 | if (PAGE_SIZE < RADEON_GPU_PAGE_SIZE) { | |
771fe6b9 JG |
346 | DRM_ERROR("Page size is smaller than GPU page size!\n"); |
347 | return -EINVAL; | |
348 | } | |
82568565 DA |
349 | r = radeon_dummy_page_init(rdev); |
350 | if (r) | |
351 | return r; | |
771fe6b9 JG |
352 | /* Compute table size */ |
353 | rdev->gart.num_cpu_pages = rdev->mc.gtt_size / PAGE_SIZE; | |
a77f1718 | 354 | rdev->gart.num_gpu_pages = rdev->mc.gtt_size / RADEON_GPU_PAGE_SIZE; |
771fe6b9 JG |
355 | DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", |
356 | rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); | |
357 | /* Allocate pages table */ | |
59240ee3 | 358 | rdev->gart.pages = vzalloc(sizeof(void *) * rdev->gart.num_cpu_pages); |
771fe6b9 JG |
359 | if (rdev->gart.pages == NULL) { |
360 | radeon_gart_fini(rdev); | |
361 | return -ENOMEM; | |
362 | } | |
59240ee3 CK |
363 | rdev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) * |
364 | rdev->gart.num_cpu_pages); | |
771fe6b9 JG |
365 | if (rdev->gart.pages_addr == NULL) { |
366 | radeon_gart_fini(rdev); | |
367 | return -ENOMEM; | |
368 | } | |
82568565 DA |
369 | /* set GART entry to point to the dummy page by default */ |
370 | for (i = 0; i < rdev->gart.num_cpu_pages; i++) { | |
371 | rdev->gart.pages_addr[i] = rdev->dummy_page.addr; | |
372 | } | |
771fe6b9 JG |
373 | return 0; |
374 | } | |
375 | ||
03eec93b AD |
376 | /** |
377 | * radeon_gart_fini - tear down the driver info for managing the gart | |
378 | * | |
379 | * @rdev: radeon_device pointer | |
380 | * | |
381 | * Tear down the gart driver info and free the dummy page (all asics). | |
382 | */ | |
771fe6b9 JG |
383 | void radeon_gart_fini(struct radeon_device *rdev) |
384 | { | |
385 | if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) { | |
386 | /* unbind pages */ | |
387 | radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); | |
388 | } | |
389 | rdev->gart.ready = false; | |
59240ee3 CK |
390 | vfree(rdev->gart.pages); |
391 | vfree(rdev->gart.pages_addr); | |
771fe6b9 JG |
392 | rdev->gart.pages = NULL; |
393 | rdev->gart.pages_addr = NULL; | |
92656d70 AD |
394 | |
395 | radeon_dummy_page_fini(rdev); | |
771fe6b9 | 396 | } |
721604a1 | 397 | |
09db8644 AD |
398 | /* |
399 | * GPUVM | |
400 | * GPUVM is similar to the legacy gart on older asics, however | |
401 | * rather than there being a single global gart table | |
402 | * for the entire GPU, there are multiple VM page tables active | |
403 | * at any given time. The VM page tables can contain a mix | |
404 | * vram pages and system memory pages and system memory pages | |
405 | * can be mapped as snooped (cached system pages) or unsnooped | |
406 | * (uncached system pages). | |
407 | * Each VM has an ID associated with it and there is a page table | |
408 | * associated with each VMID. When execting a command buffer, | |
409 | * the kernel tells the the ring what VMID to use for that command | |
410 | * buffer. VMIDs are allocated dynamically as commands are submitted. | |
411 | * The userspace drivers maintain their own address space and the kernel | |
412 | * sets up their pages tables accordingly when they submit their | |
413 | * command buffers and a VMID is assigned. | |
414 | * Cayman/Trinity support up to 8 active VMs at any given time; | |
415 | * SI supports 16. | |
416 | */ | |
417 | ||
721604a1 JG |
418 | /* |
419 | * vm helpers | |
420 | * | |
421 | * TODO bind a default page at vm initialization for default address | |
422 | */ | |
c6105f24 | 423 | |
90a51a32 CK |
424 | /** |
425 | * radeon_vm_num_pde - return the number of page directory entries | |
426 | * | |
427 | * @rdev: radeon_device pointer | |
428 | * | |
429 | * Calculate the number of page directory entries (cayman+). | |
430 | */ | |
431 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) | |
432 | { | |
433 | return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; | |
434 | } | |
435 | ||
fa87e62d DC |
436 | /** |
437 | * radeon_vm_directory_size - returns the size of the page directory in bytes | |
438 | * | |
439 | * @rdev: radeon_device pointer | |
440 | * | |
441 | * Calculate the size of the page directory in bytes (cayman+). | |
442 | */ | |
443 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) | |
444 | { | |
90a51a32 | 445 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
fa87e62d DC |
446 | } |
447 | ||
09db8644 AD |
448 | /** |
449 | * radeon_vm_manager_init - init the vm manager | |
450 | * | |
451 | * @rdev: radeon_device pointer | |
452 | * | |
453 | * Init the vm manager (cayman+). | |
454 | * Returns 0 for success, error for failure. | |
455 | */ | |
721604a1 JG |
456 | int radeon_vm_manager_init(struct radeon_device *rdev) |
457 | { | |
c6105f24 CK |
458 | struct radeon_vm *vm; |
459 | struct radeon_bo_va *bo_va; | |
721604a1 | 460 | int r; |
fa87e62d | 461 | unsigned size; |
721604a1 | 462 | |
c6105f24 | 463 | if (!rdev->vm_manager.enabled) { |
e6b0b6a8 | 464 | /* allocate enough for 2 full VM pts */ |
90a51a32 CK |
465 | size = radeon_vm_directory_size(rdev); |
466 | size += rdev->vm_manager.max_pfn * 8; | |
fa87e62d | 467 | size *= 2; |
c6105f24 | 468 | r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, |
3e3e53f8 | 469 | RADEON_GPU_PAGE_ALIGN(size), |
1c01103c | 470 | RADEON_VM_PTB_ALIGN_SIZE, |
c6105f24 CK |
471 | RADEON_GEM_DOMAIN_VRAM); |
472 | if (r) { | |
473 | dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", | |
474 | (rdev->vm_manager.max_pfn * 8) >> 10); | |
475 | return r; | |
476 | } | |
67e915e4 | 477 | |
05b07147 | 478 | r = radeon_asic_vm_init(rdev); |
c6105f24 CK |
479 | if (r) |
480 | return r; | |
089a786e | 481 | |
c6105f24 CK |
482 | rdev->vm_manager.enabled = true; |
483 | ||
484 | r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); | |
485 | if (r) | |
486 | return r; | |
721604a1 | 487 | } |
67e915e4 | 488 | |
c6105f24 CK |
489 | /* restore page table */ |
490 | list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { | |
90a51a32 | 491 | if (vm->page_directory == NULL) |
c6105f24 | 492 | continue; |
67e915e4 | 493 | |
c6105f24 | 494 | list_for_each_entry(bo_va, &vm->va, vm_list) { |
c6105f24 | 495 | bo_va->valid = false; |
c6105f24 CK |
496 | } |
497 | } | |
498 | return 0; | |
721604a1 JG |
499 | } |
500 | ||
09db8644 | 501 | /** |
ddf03f5c | 502 | * radeon_vm_free_pt - free the page table for a specific vm |
09db8644 AD |
503 | * |
504 | * @rdev: radeon_device pointer | |
505 | * @vm: vm to unbind | |
506 | * | |
ddf03f5c CK |
507 | * Free the page table of a specific vm (cayman+). |
508 | * | |
509 | * Global and local mutex must be lock! | |
09db8644 | 510 | */ |
ddf03f5c | 511 | static void radeon_vm_free_pt(struct radeon_device *rdev, |
721604a1 JG |
512 | struct radeon_vm *vm) |
513 | { | |
514 | struct radeon_bo_va *bo_va; | |
90a51a32 | 515 | int i; |
721604a1 | 516 | |
90a51a32 | 517 | if (!vm->page_directory) |
721604a1 | 518 | return; |
721604a1 | 519 | |
721604a1 | 520 | list_del_init(&vm->list); |
90a51a32 | 521 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); |
721604a1 JG |
522 | |
523 | list_for_each_entry(bo_va, &vm->va, vm_list) { | |
524 | bo_va->valid = false; | |
525 | } | |
90a51a32 CK |
526 | |
527 | if (vm->page_tables == NULL) | |
528 | return; | |
529 | ||
530 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) | |
531 | radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); | |
532 | ||
533 | kfree(vm->page_tables); | |
721604a1 JG |
534 | } |
535 | ||
09db8644 AD |
536 | /** |
537 | * radeon_vm_manager_fini - tear down the vm manager | |
538 | * | |
539 | * @rdev: radeon_device pointer | |
540 | * | |
541 | * Tear down the VM manager (cayman+). | |
542 | */ | |
721604a1 | 543 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
721604a1 JG |
544 | { |
545 | struct radeon_vm *vm, *tmp; | |
ee60e29f | 546 | int i; |
721604a1 | 547 | |
c6105f24 CK |
548 | if (!rdev->vm_manager.enabled) |
549 | return; | |
550 | ||
36ff39c4 | 551 | mutex_lock(&rdev->vm_manager.lock); |
ddf03f5c | 552 | /* free all allocated page tables */ |
721604a1 | 553 | list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { |
ddf03f5c CK |
554 | mutex_lock(&vm->mutex); |
555 | radeon_vm_free_pt(rdev, vm); | |
556 | mutex_unlock(&vm->mutex); | |
721604a1 | 557 | } |
ee60e29f CK |
558 | for (i = 0; i < RADEON_NUM_VM; ++i) { |
559 | radeon_fence_unref(&rdev->vm_manager.active[i]); | |
560 | } | |
05b07147 | 561 | radeon_asic_vm_fini(rdev); |
36ff39c4 | 562 | mutex_unlock(&rdev->vm_manager.lock); |
c6105f24 CK |
563 | |
564 | radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); | |
565 | radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); | |
566 | rdev->vm_manager.enabled = false; | |
721604a1 JG |
567 | } |
568 | ||
90a51a32 CK |
569 | /** |
570 | * radeon_vm_evict - evict page table to make room for new one | |
571 | * | |
572 | * @rdev: radeon_device pointer | |
573 | * @vm: VM we want to allocate something for | |
574 | * | |
575 | * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). | |
576 | * Returns 0 for success, -ENOMEM for failure. | |
577 | * | |
578 | * Global and local mutex must be locked! | |
579 | */ | |
1518d7fb | 580 | static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) |
90a51a32 CK |
581 | { |
582 | struct radeon_vm *vm_evict; | |
583 | ||
584 | if (list_empty(&rdev->vm_manager.lru_vm)) | |
585 | return -ENOMEM; | |
586 | ||
587 | vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, | |
588 | struct radeon_vm, list); | |
589 | if (vm_evict == vm) | |
590 | return -ENOMEM; | |
591 | ||
592 | mutex_lock(&vm_evict->mutex); | |
593 | radeon_vm_free_pt(rdev, vm_evict); | |
594 | mutex_unlock(&vm_evict->mutex); | |
595 | return 0; | |
596 | } | |
597 | ||
09db8644 | 598 | /** |
ddf03f5c | 599 | * radeon_vm_alloc_pt - allocates a page table for a VM |
09db8644 AD |
600 | * |
601 | * @rdev: radeon_device pointer | |
602 | * @vm: vm to bind | |
603 | * | |
ddf03f5c | 604 | * Allocate a page table for the requested vm (cayman+). |
09db8644 | 605 | * Returns 0 for success, error for failure. |
ddf03f5c CK |
606 | * |
607 | * Global and local mutex must be locked! | |
09db8644 | 608 | */ |
ddf03f5c | 609 | int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) |
721604a1 | 610 | { |
db96bd25 CK |
611 | unsigned pd_size, pd_entries, pts_size; |
612 | struct radeon_ib ib; | |
90a51a32 | 613 | int r; |
721604a1 JG |
614 | |
615 | if (vm == NULL) { | |
616 | return -EINVAL; | |
617 | } | |
618 | ||
90a51a32 | 619 | if (vm->page_directory != NULL) { |
721604a1 JG |
620 | return 0; |
621 | } | |
622 | ||
3e3e53f8 | 623 | pd_size = radeon_vm_directory_size(rdev); |
db96bd25 CK |
624 | pd_entries = radeon_vm_num_pdes(rdev); |
625 | ||
626 | retry: | |
90a51a32 CK |
627 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, |
628 | &vm->page_directory, pd_size, | |
1c01103c | 629 | RADEON_VM_PTB_ALIGN_SIZE, false); |
ddf03f5c | 630 | if (r == -ENOMEM) { |
90a51a32 CK |
631 | r = radeon_vm_evict(rdev, vm); |
632 | if (r) | |
721604a1 | 633 | return r; |
721604a1 | 634 | goto retry; |
ddf03f5c CK |
635 | |
636 | } else if (r) { | |
637 | return r; | |
721604a1 | 638 | } |
721604a1 | 639 | |
90a51a32 CK |
640 | vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); |
641 | ||
642 | /* Initially clear the page directory */ | |
db96bd25 CK |
643 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, |
644 | NULL, pd_entries * 2 + 64); | |
645 | if (r) { | |
646 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); | |
647 | return r; | |
648 | } | |
649 | ||
650 | ib.length_dw = 0; | |
651 | ||
652 | radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, | |
653 | 0, pd_entries, 0, 0); | |
654 | ||
1654b817 | 655 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
db96bd25 CK |
656 | r = radeon_ib_schedule(rdev, &ib, NULL); |
657 | if (r) { | |
658 | radeon_ib_free(rdev, &ib); | |
659 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); | |
660 | return r; | |
661 | } | |
662 | radeon_fence_unref(&vm->fence); | |
663 | vm->fence = radeon_fence_ref(ib.fence); | |
664 | radeon_ib_free(rdev, &ib); | |
665 | radeon_fence_unref(&vm->last_flush); | |
90a51a32 | 666 | |
db96bd25 | 667 | /* allocate page table array */ |
90a51a32 CK |
668 | pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); |
669 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL); | |
670 | ||
671 | if (vm->page_tables == NULL) { | |
672 | DRM_ERROR("Cannot allocate memory for page table array\n"); | |
673 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); | |
674 | return -ENOMEM; | |
675 | } | |
721604a1 | 676 | |
d72d43cf | 677 | return 0; |
ee60e29f CK |
678 | } |
679 | ||
13e55c38 CK |
680 | /** |
681 | * radeon_vm_add_to_lru - add VMs page table to LRU list | |
682 | * | |
683 | * @rdev: radeon_device pointer | |
684 | * @vm: vm to add to LRU | |
685 | * | |
686 | * Add the allocated page table to the LRU list (cayman+). | |
687 | * | |
688 | * Global mutex must be locked! | |
689 | */ | |
690 | void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) | |
691 | { | |
692 | list_del_init(&vm->list); | |
693 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); | |
694 | } | |
695 | ||
ee60e29f CK |
696 | /** |
697 | * radeon_vm_grab_id - allocate the next free VMID | |
698 | * | |
699 | * @rdev: radeon_device pointer | |
700 | * @vm: vm to allocate id for | |
701 | * @ring: ring we want to submit job to | |
702 | * | |
703 | * Allocate an id for the vm (cayman+). | |
704 | * Returns the fence we need to sync to (if any). | |
705 | * | |
706 | * Global and local mutex must be locked! | |
707 | */ | |
708 | struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, | |
709 | struct radeon_vm *vm, int ring) | |
710 | { | |
711 | struct radeon_fence *best[RADEON_NUM_RINGS] = {}; | |
712 | unsigned choices[2] = {}; | |
713 | unsigned i; | |
714 | ||
715 | /* check if the id is still valid */ | |
716 | if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id]) | |
717 | return NULL; | |
718 | ||
719 | /* we definately need to flush */ | |
720 | radeon_fence_unref(&vm->last_flush); | |
721 | ||
722 | /* skip over VMID 0, since it is the system VM */ | |
723 | for (i = 1; i < rdev->vm_manager.nvm; ++i) { | |
724 | struct radeon_fence *fence = rdev->vm_manager.active[i]; | |
725 | ||
726 | if (fence == NULL) { | |
727 | /* found a free one */ | |
728 | vm->id = i; | |
729 | return NULL; | |
730 | } | |
731 | ||
732 | if (radeon_fence_is_earlier(fence, best[fence->ring])) { | |
733 | best[fence->ring] = fence; | |
734 | choices[fence->ring == ring ? 0 : 1] = i; | |
721604a1 JG |
735 | } |
736 | } | |
721604a1 | 737 | |
ee60e29f CK |
738 | for (i = 0; i < 2; ++i) { |
739 | if (choices[i]) { | |
740 | vm->id = choices[i]; | |
84d597b7 | 741 | trace_radeon_vm_grab_id(vm->id, ring); |
ee60e29f CK |
742 | return rdev->vm_manager.active[choices[i]]; |
743 | } | |
721604a1 JG |
744 | } |
745 | ||
ee60e29f CK |
746 | /* should never happen */ |
747 | BUG(); | |
748 | return NULL; | |
749 | } | |
750 | ||
751 | /** | |
752 | * radeon_vm_fence - remember fence for vm | |
753 | * | |
754 | * @rdev: radeon_device pointer | |
755 | * @vm: vm we want to fence | |
756 | * @fence: fence to remember | |
757 | * | |
758 | * Fence the vm (cayman+). | |
759 | * Set the fence used to protect page table and id. | |
760 | * | |
761 | * Global and local mutex must be locked! | |
762 | */ | |
763 | void radeon_vm_fence(struct radeon_device *rdev, | |
764 | struct radeon_vm *vm, | |
765 | struct radeon_fence *fence) | |
766 | { | |
767 | radeon_fence_unref(&rdev->vm_manager.active[vm->id]); | |
768 | rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); | |
769 | ||
770 | radeon_fence_unref(&vm->fence); | |
771 | vm->fence = radeon_fence_ref(fence); | |
721604a1 JG |
772 | } |
773 | ||
421ca7ab CK |
774 | /** |
775 | * radeon_vm_bo_find - find the bo_va for a specific vm & bo | |
776 | * | |
777 | * @vm: requested vm | |
778 | * @bo: requested buffer object | |
779 | * | |
780 | * Find @bo inside the requested vm (cayman+). | |
781 | * Search inside the @bos vm list for the requested vm | |
782 | * Returns the found bo_va or NULL if none is found | |
783 | * | |
784 | * Object has to be reserved! | |
785 | */ | |
786 | struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, | |
787 | struct radeon_bo *bo) | |
788 | { | |
789 | struct radeon_bo_va *bo_va; | |
790 | ||
791 | list_for_each_entry(bo_va, &bo->va, bo_list) { | |
792 | if (bo_va->vm == vm) { | |
793 | return bo_va; | |
794 | } | |
721604a1 | 795 | } |
421ca7ab | 796 | return NULL; |
721604a1 JG |
797 | } |
798 | ||
09db8644 AD |
799 | /** |
800 | * radeon_vm_bo_add - add a bo to a specific vm | |
801 | * | |
802 | * @rdev: radeon_device pointer | |
803 | * @vm: requested vm | |
804 | * @bo: radeon buffer object | |
09db8644 AD |
805 | * |
806 | * Add @bo into the requested vm (cayman+). | |
e971bd5e CK |
807 | * Add @bo to the list of bos associated with the vm |
808 | * Returns newly added bo_va or NULL for failure | |
421ca7ab CK |
809 | * |
810 | * Object has to be reserved! | |
09db8644 | 811 | */ |
e971bd5e CK |
812 | struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, |
813 | struct radeon_vm *vm, | |
814 | struct radeon_bo *bo) | |
721604a1 | 815 | { |
e971bd5e | 816 | struct radeon_bo_va *bo_va; |
721604a1 JG |
817 | |
818 | bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); | |
819 | if (bo_va == NULL) { | |
e971bd5e | 820 | return NULL; |
721604a1 JG |
821 | } |
822 | bo_va->vm = vm; | |
823 | bo_va->bo = bo; | |
e971bd5e CK |
824 | bo_va->soffset = 0; |
825 | bo_va->eoffset = 0; | |
826 | bo_va->flags = 0; | |
721604a1 | 827 | bo_va->valid = false; |
e971bd5e | 828 | bo_va->ref_count = 1; |
721604a1 JG |
829 | INIT_LIST_HEAD(&bo_va->bo_list); |
830 | INIT_LIST_HEAD(&bo_va->vm_list); | |
721604a1 | 831 | |
e971bd5e CK |
832 | mutex_lock(&vm->mutex); |
833 | list_add(&bo_va->vm_list, &vm->va); | |
834 | list_add_tail(&bo_va->bo_list, &bo->va); | |
835 | mutex_unlock(&vm->mutex); | |
836 | ||
837 | return bo_va; | |
838 | } | |
839 | ||
840 | /** | |
841 | * radeon_vm_bo_set_addr - set bos virtual address inside a vm | |
842 | * | |
843 | * @rdev: radeon_device pointer | |
844 | * @bo_va: bo_va to store the address | |
845 | * @soffset: requested offset of the buffer in the VM address space | |
846 | * @flags: attributes of pages (read/write/valid/etc.) | |
847 | * | |
848 | * Set offset of @bo_va (cayman+). | |
849 | * Validate and set the offset requested within the vm address space. | |
850 | * Returns 0 for success, error for failure. | |
851 | * | |
852 | * Object has to be reserved! | |
853 | */ | |
854 | int radeon_vm_bo_set_addr(struct radeon_device *rdev, | |
855 | struct radeon_bo_va *bo_va, | |
856 | uint64_t soffset, | |
857 | uint32_t flags) | |
858 | { | |
859 | uint64_t size = radeon_bo_size(bo_va->bo); | |
860 | uint64_t eoffset, last_offset = 0; | |
861 | struct radeon_vm *vm = bo_va->vm; | |
862 | struct radeon_bo_va *tmp; | |
863 | struct list_head *head; | |
864 | unsigned last_pfn; | |
865 | ||
866 | if (soffset) { | |
867 | /* make sure object fit at this offset */ | |
868 | eoffset = soffset + size; | |
869 | if (soffset >= eoffset) { | |
870 | return -EINVAL; | |
871 | } | |
872 | ||
873 | last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; | |
874 | if (last_pfn > rdev->vm_manager.max_pfn) { | |
875 | dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", | |
876 | last_pfn, rdev->vm_manager.max_pfn); | |
877 | return -EINVAL; | |
878 | } | |
879 | ||
880 | } else { | |
881 | eoffset = last_pfn = 0; | |
721604a1 JG |
882 | } |
883 | ||
884 | mutex_lock(&vm->mutex); | |
721604a1 JG |
885 | head = &vm->va; |
886 | last_offset = 0; | |
887 | list_for_each_entry(tmp, &vm->va, vm_list) { | |
e971bd5e CK |
888 | if (bo_va == tmp) { |
889 | /* skip over currently modified bo */ | |
890 | continue; | |
891 | } | |
892 | ||
893 | if (soffset >= last_offset && eoffset <= tmp->soffset) { | |
721604a1 JG |
894 | /* bo can be added before this one */ |
895 | break; | |
896 | } | |
e971bd5e | 897 | if (eoffset > tmp->soffset && soffset < tmp->eoffset) { |
721604a1 | 898 | /* bo and tmp overlap, invalid offset */ |
721604a1 | 899 | dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", |
e971bd5e | 900 | bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, |
721604a1 JG |
901 | (unsigned)tmp->soffset, (unsigned)tmp->eoffset); |
902 | mutex_unlock(&vm->mutex); | |
903 | return -EINVAL; | |
904 | } | |
905 | last_offset = tmp->eoffset; | |
906 | head = &tmp->vm_list; | |
907 | } | |
e971bd5e CK |
908 | |
909 | bo_va->soffset = soffset; | |
910 | bo_va->eoffset = eoffset; | |
911 | bo_va->flags = flags; | |
912 | bo_va->valid = false; | |
913 | list_move(&bo_va->vm_list, head); | |
914 | ||
721604a1 JG |
915 | mutex_unlock(&vm->mutex); |
916 | return 0; | |
917 | } | |
918 | ||
09db8644 | 919 | /** |
dce34bfd | 920 | * radeon_vm_map_gart - get the physical address of a gart page |
09db8644 AD |
921 | * |
922 | * @rdev: radeon_device pointer | |
dce34bfd | 923 | * @addr: the unmapped addr |
09db8644 AD |
924 | * |
925 | * Look up the physical address of the page that the pte resolves | |
926 | * to (cayman+). | |
927 | * Returns the physical address of the page. | |
928 | */ | |
dce34bfd | 929 | uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) |
721604a1 | 930 | { |
dce34bfd CK |
931 | uint64_t result; |
932 | ||
933 | /* page table offset */ | |
934 | result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; | |
935 | ||
936 | /* in case cpu page size != gpu page size*/ | |
937 | result |= addr & (~PAGE_MASK); | |
938 | ||
939 | return result; | |
721604a1 JG |
940 | } |
941 | ||
24c16439 CK |
942 | /** |
943 | * radeon_vm_page_flags - translate page flags to what the hw uses | |
944 | * | |
945 | * @flags: flags comming from userspace | |
946 | * | |
947 | * Translate the flags the userspace ABI uses to hw flags. | |
948 | */ | |
949 | static uint32_t radeon_vm_page_flags(uint32_t flags) | |
950 | { | |
951 | uint32_t hw_flags = 0; | |
952 | hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; | |
953 | hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; | |
954 | hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; | |
955 | if (flags & RADEON_VM_PAGE_SYSTEM) { | |
956 | hw_flags |= R600_PTE_SYSTEM; | |
957 | hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; | |
958 | } | |
959 | return hw_flags; | |
960 | } | |
961 | ||
90a51a32 CK |
962 | /** |
963 | * radeon_vm_update_pdes - make sure that page directory is valid | |
964 | * | |
965 | * @rdev: radeon_device pointer | |
966 | * @vm: requested vm | |
967 | * @start: start of GPU address range | |
968 | * @end: end of GPU address range | |
969 | * | |
970 | * Allocates new page tables if necessary | |
971 | * and updates the page directory (cayman+). | |
972 | * Returns 0 for success, error for failure. | |
973 | * | |
974 | * Global and local mutex must be locked! | |
975 | */ | |
976 | static int radeon_vm_update_pdes(struct radeon_device *rdev, | |
977 | struct radeon_vm *vm, | |
43f1214a | 978 | struct radeon_ib *ib, |
90a51a32 CK |
979 | uint64_t start, uint64_t end) |
980 | { | |
981 | static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; | |
982 | ||
983 | uint64_t last_pde = ~0, last_pt = ~0; | |
984 | unsigned count = 0; | |
985 | uint64_t pt_idx; | |
986 | int r; | |
987 | ||
988 | start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; | |
989 | end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; | |
990 | ||
991 | /* walk over the address space and update the page directory */ | |
992 | for (pt_idx = start; pt_idx <= end; ++pt_idx) { | |
993 | uint64_t pde, pt; | |
994 | ||
995 | if (vm->page_tables[pt_idx]) | |
996 | continue; | |
997 | ||
998 | retry: | |
999 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, | |
1000 | &vm->page_tables[pt_idx], | |
3e3e53f8 AD |
1001 | RADEON_VM_PTE_COUNT * 8, |
1002 | RADEON_GPU_PAGE_SIZE, false); | |
90a51a32 CK |
1003 | |
1004 | if (r == -ENOMEM) { | |
1005 | r = radeon_vm_evict(rdev, vm); | |
1006 | if (r) | |
1007 | return r; | |
1008 | goto retry; | |
1009 | } else if (r) { | |
1010 | return r; | |
1011 | } | |
1012 | ||
1013 | pde = vm->pd_gpu_addr + pt_idx * 8; | |
1014 | ||
1015 | pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); | |
1016 | ||
1017 | if (((last_pde + 8 * count) != pde) || | |
1018 | ((last_pt + incr * count) != pt)) { | |
1019 | ||
1020 | if (count) { | |
43f1214a | 1021 | radeon_asic_vm_set_page(rdev, ib, last_pde, |
90a51a32 | 1022 | last_pt, count, incr, |
24c16439 | 1023 | R600_PTE_VALID); |
5b2906ec CK |
1024 | |
1025 | count *= RADEON_VM_PTE_COUNT; | |
1026 | radeon_asic_vm_set_page(rdev, ib, last_pt, 0, | |
1027 | count, 0, 0); | |
90a51a32 CK |
1028 | } |
1029 | ||
1030 | count = 1; | |
1031 | last_pde = pde; | |
1032 | last_pt = pt; | |
1033 | } else { | |
1034 | ++count; | |
1035 | } | |
1036 | } | |
1037 | ||
1038 | if (count) { | |
43f1214a | 1039 | radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, |
24c16439 | 1040 | incr, R600_PTE_VALID); |
90a51a32 | 1041 | |
5b2906ec CK |
1042 | count *= RADEON_VM_PTE_COUNT; |
1043 | radeon_asic_vm_set_page(rdev, ib, last_pt, 0, | |
1044 | count, 0, 0); | |
90a51a32 CK |
1045 | } |
1046 | ||
1047 | return 0; | |
1048 | } | |
1049 | ||
1050 | /** | |
1051 | * radeon_vm_update_ptes - make sure that page tables are valid | |
1052 | * | |
1053 | * @rdev: radeon_device pointer | |
1054 | * @vm: requested vm | |
1055 | * @start: start of GPU address range | |
1056 | * @end: end of GPU address range | |
1057 | * @dst: destination address to map to | |
1058 | * @flags: mapping flags | |
1059 | * | |
1060 | * Update the page tables in the range @start - @end (cayman+). | |
1061 | * | |
1062 | * Global and local mutex must be locked! | |
1063 | */ | |
1064 | static void radeon_vm_update_ptes(struct radeon_device *rdev, | |
1065 | struct radeon_vm *vm, | |
43f1214a | 1066 | struct radeon_ib *ib, |
90a51a32 CK |
1067 | uint64_t start, uint64_t end, |
1068 | uint64_t dst, uint32_t flags) | |
1069 | { | |
1070 | static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; | |
1071 | ||
1072 | uint64_t last_pte = ~0, last_dst = ~0; | |
1073 | unsigned count = 0; | |
1074 | uint64_t addr; | |
1075 | ||
1076 | start = start / RADEON_GPU_PAGE_SIZE; | |
1077 | end = end / RADEON_GPU_PAGE_SIZE; | |
1078 | ||
1079 | /* walk over the address space and update the page tables */ | |
1080 | for (addr = start; addr < end; ) { | |
1081 | uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; | |
1082 | unsigned nptes; | |
1083 | uint64_t pte; | |
1084 | ||
1085 | if ((addr & ~mask) == (end & ~mask)) | |
1086 | nptes = end - addr; | |
1087 | else | |
1088 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); | |
1089 | ||
1090 | pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); | |
1091 | pte += (addr & mask) * 8; | |
1092 | ||
204a393c | 1093 | if ((last_pte + 8 * count) != pte) { |
90a51a32 CK |
1094 | |
1095 | if (count) { | |
43f1214a | 1096 | radeon_asic_vm_set_page(rdev, ib, last_pte, |
90a51a32 CK |
1097 | last_dst, count, |
1098 | RADEON_GPU_PAGE_SIZE, | |
1099 | flags); | |
1100 | } | |
1101 | ||
1102 | count = nptes; | |
1103 | last_pte = pte; | |
1104 | last_dst = dst; | |
1105 | } else { | |
1106 | count += nptes; | |
1107 | } | |
1108 | ||
1109 | addr += nptes; | |
1110 | dst += nptes * RADEON_GPU_PAGE_SIZE; | |
1111 | } | |
1112 | ||
1113 | if (count) { | |
43f1214a AD |
1114 | radeon_asic_vm_set_page(rdev, ib, last_pte, |
1115 | last_dst, count, | |
90a51a32 CK |
1116 | RADEON_GPU_PAGE_SIZE, flags); |
1117 | } | |
1118 | } | |
1119 | ||
09db8644 | 1120 | /** |
9c57a6bd | 1121 | * radeon_vm_bo_update - map a bo into the vm page table |
09db8644 AD |
1122 | * |
1123 | * @rdev: radeon_device pointer | |
1124 | * @vm: requested vm | |
1125 | * @bo: radeon buffer object | |
1126 | * @mem: ttm mem | |
1127 | * | |
1128 | * Fill in the page table entries for @bo (cayman+). | |
1129 | * Returns 0 for success, -EINVAL for failure. | |
2a6f1abb CK |
1130 | * |
1131 | * Object have to be reserved & global and local mutex must be locked! | |
09db8644 | 1132 | */ |
9c57a6bd CK |
1133 | int radeon_vm_bo_update(struct radeon_device *rdev, |
1134 | struct radeon_vm *vm, | |
1135 | struct radeon_bo *bo, | |
1136 | struct ttm_mem_reg *mem) | |
721604a1 | 1137 | { |
43f1214a | 1138 | struct radeon_ib ib; |
721604a1 | 1139 | struct radeon_bo_va *bo_va; |
fa87e62d | 1140 | unsigned nptes, npdes, ndw; |
90a51a32 | 1141 | uint64_t addr; |
2a6f1abb | 1142 | int r; |
721604a1 JG |
1143 | |
1144 | /* nothing to do if vm isn't bound */ | |
90a51a32 | 1145 | if (vm->page_directory == NULL) |
04bd27ae | 1146 | return 0; |
721604a1 | 1147 | |
421ca7ab | 1148 | bo_va = radeon_vm_bo_find(vm, bo); |
721604a1 JG |
1149 | if (bo_va == NULL) { |
1150 | dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); | |
1151 | return -EINVAL; | |
1152 | } | |
1153 | ||
e971bd5e CK |
1154 | if (!bo_va->soffset) { |
1155 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", | |
1156 | bo, vm); | |
1157 | return -EINVAL; | |
1158 | } | |
1159 | ||
2a6f1abb | 1160 | if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) |
721604a1 JG |
1161 | return 0; |
1162 | ||
721604a1 JG |
1163 | bo_va->flags &= ~RADEON_VM_PAGE_VALID; |
1164 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; | |
1165 | if (mem) { | |
dce34bfd | 1166 | addr = mem->start << PAGE_SHIFT; |
721604a1 JG |
1167 | if (mem->mem_type != TTM_PL_SYSTEM) { |
1168 | bo_va->flags |= RADEON_VM_PAGE_VALID; | |
1169 | bo_va->valid = true; | |
1170 | } | |
1171 | if (mem->mem_type == TTM_PL_TT) { | |
1172 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM; | |
dce34bfd CK |
1173 | } else { |
1174 | addr += rdev->vm_manager.vram_base_offset; | |
721604a1 | 1175 | } |
2a6f1abb | 1176 | } else { |
dce34bfd | 1177 | addr = 0; |
2a6f1abb | 1178 | bo_va->valid = false; |
721604a1 | 1179 | } |
2a6f1abb | 1180 | |
9c57a6bd CK |
1181 | trace_radeon_vm_bo_update(bo_va); |
1182 | ||
fa87e62d DC |
1183 | nptes = radeon_bo_ngpu_pages(bo); |
1184 | ||
90a51a32 CK |
1185 | /* assume two extra pdes in case the mapping overlaps the borders */ |
1186 | npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; | |
1187 | ||
43f1214a AD |
1188 | /* padding, etc. */ |
1189 | ndw = 64; | |
fa87e62d | 1190 | |
90a51a32 CK |
1191 | if (RADEON_VM_BLOCK_SIZE > 11) |
1192 | /* reserve space for one header for every 2k dwords */ | |
08eda32b | 1193 | ndw += (nptes >> 11) * 4; |
90a51a32 CK |
1194 | else |
1195 | /* reserve space for one header for | |
1196 | every (1 << BLOCK_SIZE) entries */ | |
08eda32b | 1197 | ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; |
fa87e62d | 1198 | |
fa87e62d DC |
1199 | /* reserve space for pte addresses */ |
1200 | ndw += nptes * 2; | |
1201 | ||
1202 | /* reserve space for one header for every 2k dwords */ | |
08eda32b | 1203 | ndw += (npdes >> 11) * 4; |
90a51a32 | 1204 | |
fa87e62d DC |
1205 | /* reserve space for pde addresses */ |
1206 | ndw += npdes * 2; | |
2a6f1abb | 1207 | |
5b2906ec CK |
1208 | /* reserve space for clearing new page tables */ |
1209 | ndw += npdes * 2 * RADEON_VM_PTE_COUNT; | |
1210 | ||
43f1214a AD |
1211 | /* update too big for an IB */ |
1212 | if (ndw > 0xfffff) | |
1213 | return -ENOMEM; | |
2a6f1abb | 1214 | |
24c16439 | 1215 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
4cc948b9 AD |
1216 | if (r) |
1217 | return r; | |
43f1214a | 1218 | ib.length_dw = 0; |
2a6f1abb | 1219 | |
43f1214a | 1220 | r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); |
90a51a32 | 1221 | if (r) { |
43f1214a | 1222 | radeon_ib_free(rdev, &ib); |
90a51a32 CK |
1223 | return r; |
1224 | } | |
fa87e62d | 1225 | |
43f1214a | 1226 | radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, |
24c16439 | 1227 | addr, radeon_vm_page_flags(bo_va->flags)); |
2a6f1abb | 1228 | |
1654b817 | 1229 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
43f1214a | 1230 | r = radeon_ib_schedule(rdev, &ib, NULL); |
2a6f1abb | 1231 | if (r) { |
43f1214a | 1232 | radeon_ib_free(rdev, &ib); |
2a6f1abb CK |
1233 | return r; |
1234 | } | |
43f1214a AD |
1235 | radeon_fence_unref(&vm->fence); |
1236 | vm->fence = radeon_fence_ref(ib.fence); | |
1237 | radeon_ib_free(rdev, &ib); | |
9b40e5d8 | 1238 | radeon_fence_unref(&vm->last_flush); |
90a51a32 | 1239 | |
721604a1 JG |
1240 | return 0; |
1241 | } | |
1242 | ||
09db8644 AD |
1243 | /** |
1244 | * radeon_vm_bo_rmv - remove a bo to a specific vm | |
1245 | * | |
1246 | * @rdev: radeon_device pointer | |
e971bd5e | 1247 | * @bo_va: requested bo_va |
09db8644 | 1248 | * |
e971bd5e CK |
1249 | * Remove @bo_va->bo from the requested vm (cayman+). |
1250 | * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and | |
1251 | * remove the ptes for @bo_va in the page table. | |
09db8644 | 1252 | * Returns 0 for success. |
ddf03f5c CK |
1253 | * |
1254 | * Object have to be reserved! | |
09db8644 | 1255 | */ |
721604a1 | 1256 | int radeon_vm_bo_rmv(struct radeon_device *rdev, |
e971bd5e | 1257 | struct radeon_bo_va *bo_va) |
721604a1 | 1258 | { |
3813f5ca | 1259 | int r = 0; |
721604a1 | 1260 | |
36ff39c4 | 1261 | mutex_lock(&rdev->vm_manager.lock); |
e971bd5e | 1262 | mutex_lock(&bo_va->vm->mutex); |
3813f5ca | 1263 | if (bo_va->soffset) { |
9c57a6bd | 1264 | r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); |
3813f5ca | 1265 | } |
36ff39c4 | 1266 | mutex_unlock(&rdev->vm_manager.lock); |
721604a1 | 1267 | list_del(&bo_va->vm_list); |
e971bd5e | 1268 | mutex_unlock(&bo_va->vm->mutex); |
108b0d34 | 1269 | list_del(&bo_va->bo_list); |
721604a1 JG |
1270 | |
1271 | kfree(bo_va); | |
2a6f1abb | 1272 | return r; |
721604a1 JG |
1273 | } |
1274 | ||
09db8644 AD |
1275 | /** |
1276 | * radeon_vm_bo_invalidate - mark the bo as invalid | |
1277 | * | |
1278 | * @rdev: radeon_device pointer | |
1279 | * @vm: requested vm | |
1280 | * @bo: radeon buffer object | |
1281 | * | |
1282 | * Mark @bo as invalid (cayman+). | |
1283 | */ | |
721604a1 JG |
1284 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
1285 | struct radeon_bo *bo) | |
1286 | { | |
1287 | struct radeon_bo_va *bo_va; | |
1288 | ||
721604a1 JG |
1289 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
1290 | bo_va->valid = false; | |
1291 | } | |
1292 | } | |
1293 | ||
09db8644 AD |
1294 | /** |
1295 | * radeon_vm_init - initialize a vm instance | |
1296 | * | |
1297 | * @rdev: radeon_device pointer | |
1298 | * @vm: requested vm | |
1299 | * | |
d72d43cf | 1300 | * Init @vm fields (cayman+). |
09db8644 | 1301 | */ |
d72d43cf | 1302 | void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
721604a1 | 1303 | { |
ee60e29f | 1304 | vm->id = 0; |
721604a1 JG |
1305 | vm->fence = NULL; |
1306 | mutex_init(&vm->mutex); | |
1307 | INIT_LIST_HEAD(&vm->list); | |
1308 | INIT_LIST_HEAD(&vm->va); | |
721604a1 JG |
1309 | } |
1310 | ||
09db8644 | 1311 | /** |
f59abbf2 | 1312 | * radeon_vm_fini - tear down a vm instance |
09db8644 AD |
1313 | * |
1314 | * @rdev: radeon_device pointer | |
1315 | * @vm: requested vm | |
1316 | * | |
1317 | * Tear down @vm (cayman+). | |
1318 | * Unbind the VM and remove all bos from the vm bo list | |
1319 | */ | |
721604a1 JG |
1320 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
1321 | { | |
1322 | struct radeon_bo_va *bo_va, *tmp; | |
1323 | int r; | |
1324 | ||
36ff39c4 | 1325 | mutex_lock(&rdev->vm_manager.lock); |
bb409155 | 1326 | mutex_lock(&vm->mutex); |
ddf03f5c | 1327 | radeon_vm_free_pt(rdev, vm); |
36ff39c4 | 1328 | mutex_unlock(&rdev->vm_manager.lock); |
721604a1 | 1329 | |
721604a1 JG |
1330 | if (!list_empty(&vm->va)) { |
1331 | dev_err(rdev->dev, "still active bo inside vm\n"); | |
1332 | } | |
1333 | list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { | |
1334 | list_del_init(&bo_va->vm_list); | |
1335 | r = radeon_bo_reserve(bo_va->bo, false); | |
1336 | if (!r) { | |
1337 | list_del_init(&bo_va->bo_list); | |
1338 | radeon_bo_unreserve(bo_va->bo); | |
1339 | kfree(bo_va); | |
1340 | } | |
1341 | } | |
ddf03f5c CK |
1342 | radeon_fence_unref(&vm->fence); |
1343 | radeon_fence_unref(&vm->last_flush); | |
721604a1 JG |
1344 | mutex_unlock(&vm->mutex); |
1345 | } |