Commit | Line | Data |
---|---|---|
d38ceaf9 AD |
1 | /* |
2 | * Copyright 2009 Jerome Glisse. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the | |
7 | * "Software"), to deal in the Software without restriction, including | |
8 | * without limitation the rights to use, copy, modify, merge, publish, | |
9 | * distribute, sub license, and/or sell copies of the Software, and to | |
10 | * permit persons to whom the Software is furnished to do so, subject to | |
11 | * the following conditions: | |
12 | * | |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | |
20 | * | |
21 | * The above copyright notice and this permission notice (including the | |
22 | * next paragraph) shall be included in all copies or substantial portions | |
23 | * of the Software. | |
24 | * | |
25 | */ | |
26 | /* | |
27 | * Authors: | |
28 | * Jerome Glisse <glisse@freedesktop.org> | |
29 | * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> | |
30 | * Dave Airlie | |
31 | */ | |
32 | #include <linux/list.h> | |
33 | #include <linux/slab.h> | |
34 | #include <drm/drmP.h> | |
35 | #include <drm/amdgpu_drm.h> | |
a187f17f | 36 | #include <drm/drm_cache.h> |
d38ceaf9 AD |
37 | #include "amdgpu.h" |
38 | #include "amdgpu_trace.h" | |
39 | ||
40 | ||
d38ceaf9 AD |
41 | |
42 | static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, | |
7e5a547f | 43 | struct ttm_mem_reg *mem) |
d38ceaf9 | 44 | { |
6681c5eb CK |
45 | if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size) |
46 | return 0; | |
47 | ||
48 | return ((mem->start << PAGE_SHIFT) + mem->size) > | |
49 | adev->mc.visible_vram_size ? | |
50 | adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : | |
51 | mem->size; | |
d38ceaf9 AD |
52 | } |
53 | ||
54 | static void amdgpu_update_memory_usage(struct amdgpu_device *adev, | |
55 | struct ttm_mem_reg *old_mem, | |
56 | struct ttm_mem_reg *new_mem) | |
57 | { | |
58 | u64 vis_size; | |
59 | if (!adev) | |
60 | return; | |
61 | ||
62 | if (new_mem) { | |
63 | switch (new_mem->mem_type) { | |
64 | case TTM_PL_TT: | |
65 | atomic64_add(new_mem->size, &adev->gtt_usage); | |
66 | break; | |
67 | case TTM_PL_VRAM: | |
68 | atomic64_add(new_mem->size, &adev->vram_usage); | |
69 | vis_size = amdgpu_get_vis_part_size(adev, new_mem); | |
70 | atomic64_add(vis_size, &adev->vram_vis_usage); | |
71 | break; | |
72 | } | |
73 | } | |
74 | ||
75 | if (old_mem) { | |
76 | switch (old_mem->mem_type) { | |
77 | case TTM_PL_TT: | |
78 | atomic64_sub(old_mem->size, &adev->gtt_usage); | |
79 | break; | |
80 | case TTM_PL_VRAM: | |
81 | atomic64_sub(old_mem->size, &adev->vram_usage); | |
82 | vis_size = amdgpu_get_vis_part_size(adev, old_mem); | |
83 | atomic64_sub(vis_size, &adev->vram_vis_usage); | |
84 | break; | |
85 | } | |
86 | } | |
87 | } | |
88 | ||
89 | static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | |
90 | { | |
a7d64de6 | 91 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); |
d38ceaf9 AD |
92 | struct amdgpu_bo *bo; |
93 | ||
94 | bo = container_of(tbo, struct amdgpu_bo, tbo); | |
95 | ||
a7d64de6 | 96 | amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); |
d38ceaf9 | 97 | |
d38ceaf9 | 98 | drm_gem_object_release(&bo->gem_base); |
82b9c55b | 99 | amdgpu_bo_unref(&bo->parent); |
0c4e7fa5 | 100 | if (!list_empty(&bo->shadow_list)) { |
a7d64de6 | 101 | mutex_lock(&adev->shadow_list_lock); |
0c4e7fa5 | 102 | list_del_init(&bo->shadow_list); |
a7d64de6 | 103 | mutex_unlock(&adev->shadow_list_lock); |
0c4e7fa5 | 104 | } |
d38ceaf9 AD |
105 | kfree(bo->metadata); |
106 | kfree(bo); | |
107 | } | |
108 | ||
109 | bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) | |
110 | { | |
111 | if (bo->destroy == &amdgpu_ttm_bo_destroy) | |
112 | return true; | |
113 | return false; | |
114 | } | |
115 | ||
7e5a547f CZ |
116 | static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, |
117 | struct ttm_placement *placement, | |
faceaf6a | 118 | struct ttm_place *places, |
7e5a547f | 119 | u32 domain, u64 flags) |
d38ceaf9 | 120 | { |
6369f6f1 | 121 | u32 c = 0; |
7e5a547f | 122 | |
d38ceaf9 | 123 | if (domain & AMDGPU_GEM_DOMAIN_VRAM) { |
faceaf6a | 124 | unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT; |
56de55a1 CK |
125 | unsigned lpfn = 0; |
126 | ||
127 | /* This forces a reallocation if the flag wasn't set before */ | |
128 | if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) | |
129 | lpfn = adev->mc.real_vram_size >> PAGE_SHIFT; | |
faceaf6a | 130 | |
faceaf6a | 131 | places[c].fpfn = 0; |
56de55a1 | 132 | places[c].lpfn = lpfn; |
faceaf6a | 133 | places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | |
7e5a547f | 134 | TTM_PL_FLAG_VRAM; |
faceaf6a CK |
135 | if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) |
136 | places[c].lpfn = visible_pfn; | |
137 | else | |
138 | places[c].flags |= TTM_PL_FLAG_TOPDOWN; | |
139 | c++; | |
d38ceaf9 AD |
140 | } |
141 | ||
142 | if (domain & AMDGPU_GEM_DOMAIN_GTT) { | |
faceaf6a CK |
143 | places[c].fpfn = 0; |
144 | places[c].lpfn = 0; | |
145 | places[c].flags = TTM_PL_FLAG_TT; | |
146 | if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) | |
147 | places[c].flags |= TTM_PL_FLAG_WC | | |
148 | TTM_PL_FLAG_UNCACHED; | |
149 | else | |
150 | places[c].flags |= TTM_PL_FLAG_CACHED; | |
151 | c++; | |
d38ceaf9 AD |
152 | } |
153 | ||
154 | if (domain & AMDGPU_GEM_DOMAIN_CPU) { | |
faceaf6a CK |
155 | places[c].fpfn = 0; |
156 | places[c].lpfn = 0; | |
157 | places[c].flags = TTM_PL_FLAG_SYSTEM; | |
158 | if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) | |
159 | places[c].flags |= TTM_PL_FLAG_WC | | |
160 | TTM_PL_FLAG_UNCACHED; | |
161 | else | |
162 | places[c].flags |= TTM_PL_FLAG_CACHED; | |
163 | c++; | |
d38ceaf9 AD |
164 | } |
165 | ||
166 | if (domain & AMDGPU_GEM_DOMAIN_GDS) { | |
faceaf6a CK |
167 | places[c].fpfn = 0; |
168 | places[c].lpfn = 0; | |
169 | places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS; | |
170 | c++; | |
d38ceaf9 | 171 | } |
faceaf6a | 172 | |
d38ceaf9 | 173 | if (domain & AMDGPU_GEM_DOMAIN_GWS) { |
faceaf6a CK |
174 | places[c].fpfn = 0; |
175 | places[c].lpfn = 0; | |
176 | places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS; | |
177 | c++; | |
d38ceaf9 | 178 | } |
faceaf6a | 179 | |
d38ceaf9 | 180 | if (domain & AMDGPU_GEM_DOMAIN_OA) { |
faceaf6a CK |
181 | places[c].fpfn = 0; |
182 | places[c].lpfn = 0; | |
183 | places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA; | |
184 | c++; | |
d38ceaf9 AD |
185 | } |
186 | ||
187 | if (!c) { | |
faceaf6a CK |
188 | places[c].fpfn = 0; |
189 | places[c].lpfn = 0; | |
190 | places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; | |
191 | c++; | |
d38ceaf9 | 192 | } |
faceaf6a | 193 | |
7e5a547f | 194 | placement->num_placement = c; |
faceaf6a | 195 | placement->placement = places; |
d38ceaf9 | 196 | |
faceaf6a CK |
197 | placement->num_busy_placement = c; |
198 | placement->busy_placement = places; | |
d38ceaf9 AD |
199 | } |
200 | ||
765e7fbf | 201 | void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) |
7e5a547f | 202 | { |
a7d64de6 CK |
203 | struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); |
204 | ||
205 | amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements, | |
206 | domain, abo->flags); | |
7e5a547f CZ |
207 | } |
208 | ||
209 | static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, | |
210 | struct ttm_placement *placement) | |
211 | { | |
212 | BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1)); | |
213 | ||
214 | memcpy(bo->placements, placement->placement, | |
215 | placement->num_placement * sizeof(struct ttm_place)); | |
216 | bo->placement.num_placement = placement->num_placement; | |
217 | bo->placement.num_busy_placement = placement->num_busy_placement; | |
218 | bo->placement.placement = bo->placements; | |
219 | bo->placement.busy_placement = bo->placements; | |
220 | } | |
221 | ||
7c204889 CK |
222 | /** |
223 | * amdgpu_bo_create_kernel - create BO for kernel use | |
224 | * | |
225 | * @adev: amdgpu device object | |
226 | * @size: size for the new BO | |
227 | * @align: alignment for the new BO | |
228 | * @domain: where to place it | |
229 | * @bo_ptr: resulting BO | |
230 | * @gpu_addr: GPU addr of the pinned BO | |
231 | * @cpu_addr: optional CPU address mapping | |
232 | * | |
233 | * Allocates and pins a BO for kernel internal use. | |
234 | * | |
235 | * Returns 0 on success, negative error code otherwise. | |
236 | */ | |
237 | int amdgpu_bo_create_kernel(struct amdgpu_device *adev, | |
238 | unsigned long size, int align, | |
239 | u32 domain, struct amdgpu_bo **bo_ptr, | |
240 | u64 *gpu_addr, void **cpu_addr) | |
241 | { | |
242 | int r; | |
243 | ||
244 | r = amdgpu_bo_create(adev, size, align, true, domain, | |
03f48dd5 CK |
245 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
246 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | |
7c204889 CK |
247 | NULL, NULL, bo_ptr); |
248 | if (r) { | |
249 | dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); | |
250 | return r; | |
251 | } | |
252 | ||
253 | r = amdgpu_bo_reserve(*bo_ptr, false); | |
254 | if (r) { | |
255 | dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r); | |
256 | goto error_free; | |
257 | } | |
258 | ||
259 | r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr); | |
260 | if (r) { | |
261 | dev_err(adev->dev, "(%d) kernel bo pin failed\n", r); | |
262 | goto error_unreserve; | |
263 | } | |
264 | ||
265 | if (cpu_addr) { | |
266 | r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); | |
267 | if (r) { | |
268 | dev_err(adev->dev, "(%d) kernel bo map failed\n", r); | |
269 | goto error_unreserve; | |
270 | } | |
271 | } | |
272 | ||
273 | amdgpu_bo_unreserve(*bo_ptr); | |
274 | ||
275 | return 0; | |
276 | ||
277 | error_unreserve: | |
278 | amdgpu_bo_unreserve(*bo_ptr); | |
279 | ||
280 | error_free: | |
281 | amdgpu_bo_unref(bo_ptr); | |
282 | ||
283 | return r; | |
284 | } | |
285 | ||
aa1d562e JZ |
286 | /** |
287 | * amdgpu_bo_free_kernel - free BO for kernel use | |
288 | * | |
289 | * @bo: amdgpu BO to free | |
290 | * | |
291 | * unmaps and unpin a BO for kernel internal use. | |
292 | */ | |
293 | void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, | |
294 | void **cpu_addr) | |
295 | { | |
296 | if (*bo == NULL) | |
297 | return; | |
298 | ||
299 | if (likely(amdgpu_bo_reserve(*bo, false) == 0)) { | |
300 | if (cpu_addr) | |
301 | amdgpu_bo_kunmap(*bo); | |
302 | ||
303 | amdgpu_bo_unpin(*bo); | |
304 | amdgpu_bo_unreserve(*bo); | |
305 | } | |
306 | amdgpu_bo_unref(bo); | |
307 | ||
308 | if (gpu_addr) | |
309 | *gpu_addr = 0; | |
310 | ||
311 | if (cpu_addr) | |
312 | *cpu_addr = NULL; | |
313 | } | |
314 | ||
7e5a547f CZ |
315 | int amdgpu_bo_create_restricted(struct amdgpu_device *adev, |
316 | unsigned long size, int byte_align, | |
317 | bool kernel, u32 domain, u64 flags, | |
318 | struct sg_table *sg, | |
319 | struct ttm_placement *placement, | |
72d7668b | 320 | struct reservation_object *resv, |
7e5a547f | 321 | struct amdgpu_bo **bo_ptr) |
d38ceaf9 AD |
322 | { |
323 | struct amdgpu_bo *bo; | |
324 | enum ttm_bo_type type; | |
325 | unsigned long page_align; | |
326 | size_t acc_size; | |
327 | int r; | |
328 | ||
d38ceaf9 AD |
329 | page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; |
330 | size = ALIGN(size, PAGE_SIZE); | |
331 | ||
332 | if (kernel) { | |
333 | type = ttm_bo_type_kernel; | |
334 | } else if (sg) { | |
335 | type = ttm_bo_type_sg; | |
336 | } else { | |
337 | type = ttm_bo_type_device; | |
338 | } | |
339 | *bo_ptr = NULL; | |
340 | ||
341 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, | |
342 | sizeof(struct amdgpu_bo)); | |
343 | ||
344 | bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); | |
345 | if (bo == NULL) | |
346 | return -ENOMEM; | |
347 | r = drm_gem_object_init(adev->ddev, &bo->gem_base, size); | |
348 | if (unlikely(r)) { | |
349 | kfree(bo); | |
350 | return r; | |
351 | } | |
0c4e7fa5 | 352 | INIT_LIST_HEAD(&bo->shadow_list); |
d38ceaf9 | 353 | INIT_LIST_HEAD(&bo->va); |
1ea863fd CK |
354 | bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | |
355 | AMDGPU_GEM_DOMAIN_GTT | | |
356 | AMDGPU_GEM_DOMAIN_CPU | | |
357 | AMDGPU_GEM_DOMAIN_GDS | | |
358 | AMDGPU_GEM_DOMAIN_GWS | | |
359 | AMDGPU_GEM_DOMAIN_OA); | |
360 | bo->allowed_domains = bo->prefered_domains; | |
361 | if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) | |
362 | bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; | |
d38ceaf9 AD |
363 | |
364 | bo->flags = flags; | |
a187f17f OG |
365 | |
366 | /* For architectures that don't support WC memory, | |
367 | * mask out the WC flag from the BO | |
368 | */ | |
369 | if (!drm_arch_can_wc_memory()) | |
370 | bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; | |
371 | ||
7e5a547f | 372 | amdgpu_fill_placement_to_bo(bo, placement); |
d38ceaf9 | 373 | /* Kernel allocation are uninterruptible */ |
f45dc74c CK |
374 | |
375 | if (!resv) { | |
376 | bool locked; | |
377 | ||
378 | reservation_object_init(&bo->tbo.ttm_resv); | |
379 | locked = ww_mutex_trylock(&bo->tbo.ttm_resv.lock); | |
380 | WARN_ON(!locked); | |
381 | } | |
d38ceaf9 AD |
382 | r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type, |
383 | &bo->placement, page_align, !kernel, NULL, | |
f45dc74c CK |
384 | acc_size, sg, resv ? resv : &bo->tbo.ttm_resv, |
385 | &amdgpu_ttm_bo_destroy); | |
386 | if (unlikely(r != 0)) | |
d38ceaf9 | 387 | return r; |
4fea83ff FC |
388 | |
389 | if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && | |
390 | bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { | |
f54d1867 | 391 | struct dma_fence *fence; |
4fea83ff | 392 | |
c3af1258 CK |
393 | r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); |
394 | if (unlikely(r)) | |
395 | goto fail_unreserve; | |
396 | ||
4fea83ff | 397 | amdgpu_bo_fence(bo, fence, false); |
f54d1867 CW |
398 | dma_fence_put(bo->tbo.moving); |
399 | bo->tbo.moving = dma_fence_get(fence); | |
400 | dma_fence_put(fence); | |
4fea83ff | 401 | } |
f45dc74c CK |
402 | if (!resv) |
403 | ww_mutex_unlock(&bo->tbo.resv->lock); | |
d38ceaf9 AD |
404 | *bo_ptr = bo; |
405 | ||
406 | trace_amdgpu_bo_create(bo); | |
407 | ||
408 | return 0; | |
4fea83ff FC |
409 | |
410 | fail_unreserve: | |
f45dc74c | 411 | ww_mutex_unlock(&bo->tbo.resv->lock); |
4fea83ff FC |
412 | amdgpu_bo_unref(&bo); |
413 | return r; | |
d38ceaf9 AD |
414 | } |
415 | ||
e7893c4b CZ |
416 | static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, |
417 | unsigned long size, int byte_align, | |
418 | struct amdgpu_bo *bo) | |
419 | { | |
420 | struct ttm_placement placement = {0}; | |
421 | struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; | |
422 | int r; | |
423 | ||
424 | if (bo->shadow) | |
425 | return 0; | |
426 | ||
427 | bo->flags |= AMDGPU_GEM_CREATE_SHADOW; | |
428 | memset(&placements, 0, | |
429 | (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); | |
430 | ||
431 | amdgpu_ttm_placement_init(adev, &placement, | |
432 | placements, AMDGPU_GEM_DOMAIN_GTT, | |
433 | AMDGPU_GEM_CREATE_CPU_GTT_USWC); | |
434 | ||
435 | r = amdgpu_bo_create_restricted(adev, size, byte_align, true, | |
436 | AMDGPU_GEM_DOMAIN_GTT, | |
437 | AMDGPU_GEM_CREATE_CPU_GTT_USWC, | |
438 | NULL, &placement, | |
439 | bo->tbo.resv, | |
440 | &bo->shadow); | |
0c4e7fa5 | 441 | if (!r) { |
e7893c4b | 442 | bo->shadow->parent = amdgpu_bo_ref(bo); |
0c4e7fa5 CZ |
443 | mutex_lock(&adev->shadow_list_lock); |
444 | list_add_tail(&bo->shadow_list, &adev->shadow_list); | |
445 | mutex_unlock(&adev->shadow_list_lock); | |
446 | } | |
e7893c4b CZ |
447 | |
448 | return r; | |
449 | } | |
450 | ||
7e5a547f CZ |
451 | int amdgpu_bo_create(struct amdgpu_device *adev, |
452 | unsigned long size, int byte_align, | |
453 | bool kernel, u32 domain, u64 flags, | |
72d7668b CK |
454 | struct sg_table *sg, |
455 | struct reservation_object *resv, | |
456 | struct amdgpu_bo **bo_ptr) | |
7e5a547f CZ |
457 | { |
458 | struct ttm_placement placement = {0}; | |
459 | struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; | |
e7893c4b | 460 | int r; |
7e5a547f CZ |
461 | |
462 | memset(&placements, 0, | |
463 | (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); | |
464 | ||
465 | amdgpu_ttm_placement_init(adev, &placement, | |
466 | placements, domain, flags); | |
467 | ||
e7893c4b CZ |
468 | r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, |
469 | domain, flags, sg, &placement, | |
470 | resv, bo_ptr); | |
471 | if (r) | |
472 | return r; | |
473 | ||
3ad81f16 | 474 | if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) { |
e7893c4b CZ |
475 | r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); |
476 | if (r) | |
477 | amdgpu_bo_unref(bo_ptr); | |
478 | } | |
479 | ||
480 | return r; | |
7e5a547f CZ |
481 | } |
482 | ||
20f4eff1 CZ |
483 | int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, |
484 | struct amdgpu_ring *ring, | |
485 | struct amdgpu_bo *bo, | |
486 | struct reservation_object *resv, | |
f54d1867 | 487 | struct dma_fence **fence, |
20f4eff1 CZ |
488 | bool direct) |
489 | ||
490 | { | |
491 | struct amdgpu_bo *shadow = bo->shadow; | |
492 | uint64_t bo_addr, shadow_addr; | |
493 | int r; | |
494 | ||
495 | if (!shadow) | |
496 | return -EINVAL; | |
497 | ||
498 | bo_addr = amdgpu_bo_gpu_offset(bo); | |
499 | shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); | |
500 | ||
501 | r = reservation_object_reserve_shared(bo->tbo.resv); | |
502 | if (r) | |
503 | goto err; | |
504 | ||
505 | r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, | |
506 | amdgpu_bo_size(bo), resv, fence, | |
507 | direct); | |
508 | if (!r) | |
509 | amdgpu_bo_fence(bo, *fence, true); | |
510 | ||
511 | err: | |
512 | return r; | |
513 | } | |
514 | ||
515 | int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, | |
516 | struct amdgpu_ring *ring, | |
517 | struct amdgpu_bo *bo, | |
518 | struct reservation_object *resv, | |
f54d1867 | 519 | struct dma_fence **fence, |
20f4eff1 CZ |
520 | bool direct) |
521 | ||
522 | { | |
523 | struct amdgpu_bo *shadow = bo->shadow; | |
524 | uint64_t bo_addr, shadow_addr; | |
525 | int r; | |
526 | ||
527 | if (!shadow) | |
528 | return -EINVAL; | |
529 | ||
530 | bo_addr = amdgpu_bo_gpu_offset(bo); | |
531 | shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); | |
532 | ||
533 | r = reservation_object_reserve_shared(bo->tbo.resv); | |
534 | if (r) | |
535 | goto err; | |
536 | ||
537 | r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr, | |
538 | amdgpu_bo_size(bo), resv, fence, | |
539 | direct); | |
540 | if (!r) | |
541 | amdgpu_bo_fence(bo, *fence, true); | |
542 | ||
543 | err: | |
544 | return r; | |
545 | } | |
546 | ||
d38ceaf9 AD |
547 | int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) |
548 | { | |
549 | bool is_iomem; | |
587f3c70 | 550 | long r; |
d38ceaf9 | 551 | |
271c8125 CK |
552 | if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) |
553 | return -EPERM; | |
554 | ||
d38ceaf9 AD |
555 | if (bo->kptr) { |
556 | if (ptr) { | |
557 | *ptr = bo->kptr; | |
558 | } | |
559 | return 0; | |
560 | } | |
587f3c70 CK |
561 | |
562 | r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false, | |
563 | MAX_SCHEDULE_TIMEOUT); | |
564 | if (r < 0) | |
565 | return r; | |
566 | ||
d38ceaf9 | 567 | r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap); |
587f3c70 | 568 | if (r) |
d38ceaf9 | 569 | return r; |
587f3c70 | 570 | |
d38ceaf9 | 571 | bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); |
587f3c70 | 572 | if (ptr) |
d38ceaf9 | 573 | *ptr = bo->kptr; |
587f3c70 | 574 | |
d38ceaf9 AD |
575 | return 0; |
576 | } | |
577 | ||
578 | void amdgpu_bo_kunmap(struct amdgpu_bo *bo) | |
579 | { | |
580 | if (bo->kptr == NULL) | |
581 | return; | |
582 | bo->kptr = NULL; | |
583 | ttm_bo_kunmap(&bo->kmap); | |
584 | } | |
585 | ||
586 | struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) | |
587 | { | |
588 | if (bo == NULL) | |
589 | return NULL; | |
590 | ||
591 | ttm_bo_reference(&bo->tbo); | |
592 | return bo; | |
593 | } | |
594 | ||
595 | void amdgpu_bo_unref(struct amdgpu_bo **bo) | |
596 | { | |
597 | struct ttm_buffer_object *tbo; | |
598 | ||
599 | if ((*bo) == NULL) | |
600 | return; | |
601 | ||
602 | tbo = &((*bo)->tbo); | |
603 | ttm_bo_unref(&tbo); | |
604 | if (tbo == NULL) | |
605 | *bo = NULL; | |
606 | } | |
607 | ||
7e5a547f CZ |
608 | int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, |
609 | u64 min_offset, u64 max_offset, | |
d38ceaf9 AD |
610 | u64 *gpu_addr) |
611 | { | |
a7d64de6 | 612 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
d38ceaf9 | 613 | int r, i; |
7e5a547f | 614 | unsigned fpfn, lpfn; |
d38ceaf9 | 615 | |
cc325d19 | 616 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) |
d38ceaf9 AD |
617 | return -EPERM; |
618 | ||
7e5a547f CZ |
619 | if (WARN_ON_ONCE(min_offset > max_offset)) |
620 | return -EINVAL; | |
621 | ||
d38ceaf9 | 622 | if (bo->pin_count) { |
408778e8 FC |
623 | uint32_t mem_type = bo->tbo.mem.mem_type; |
624 | ||
625 | if (domain != amdgpu_mem_type_to_domain(mem_type)) | |
626 | return -EINVAL; | |
627 | ||
d38ceaf9 AD |
628 | bo->pin_count++; |
629 | if (gpu_addr) | |
630 | *gpu_addr = amdgpu_bo_gpu_offset(bo); | |
631 | ||
632 | if (max_offset != 0) { | |
27798e07 | 633 | u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset; |
d38ceaf9 AD |
634 | WARN_ON_ONCE(max_offset < |
635 | (amdgpu_bo_gpu_offset(bo) - domain_start)); | |
636 | } | |
637 | ||
638 | return 0; | |
639 | } | |
03f48dd5 CK |
640 | |
641 | bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | |
d38ceaf9 AD |
642 | amdgpu_ttm_placement_from_domain(bo, domain); |
643 | for (i = 0; i < bo->placement.num_placement; i++) { | |
644 | /* force to pin into visible video ram */ | |
645 | if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && | |
7e5a547f | 646 | !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) && |
6681c5eb | 647 | (!max_offset || max_offset > |
a7d64de6 | 648 | adev->mc.visible_vram_size)) { |
7e5a547f | 649 | if (WARN_ON_ONCE(min_offset > |
a7d64de6 | 650 | adev->mc.visible_vram_size)) |
7e5a547f CZ |
651 | return -EINVAL; |
652 | fpfn = min_offset >> PAGE_SHIFT; | |
a7d64de6 | 653 | lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; |
7e5a547f CZ |
654 | } else { |
655 | fpfn = min_offset >> PAGE_SHIFT; | |
656 | lpfn = max_offset >> PAGE_SHIFT; | |
657 | } | |
658 | if (fpfn > bo->placements[i].fpfn) | |
659 | bo->placements[i].fpfn = fpfn; | |
78d0e182 CK |
660 | if (!bo->placements[i].lpfn || |
661 | (lpfn && lpfn < bo->placements[i].lpfn)) | |
7e5a547f | 662 | bo->placements[i].lpfn = lpfn; |
d38ceaf9 AD |
663 | bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; |
664 | } | |
665 | ||
666 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); | |
6681c5eb | 667 | if (unlikely(r)) { |
a7d64de6 | 668 | dev_err(adev->dev, "%p pin failed\n", bo); |
6681c5eb CK |
669 | goto error; |
670 | } | |
bb990bb0 | 671 | r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); |
c855e250 | 672 | if (unlikely(r)) { |
a7d64de6 | 673 | dev_err(adev->dev, "%p bind failed\n", bo); |
c855e250 CK |
674 | goto error; |
675 | } | |
6681c5eb CK |
676 | |
677 | bo->pin_count = 1; | |
678 | if (gpu_addr != NULL) | |
679 | *gpu_addr = amdgpu_bo_gpu_offset(bo); | |
680 | if (domain == AMDGPU_GEM_DOMAIN_VRAM) { | |
a7d64de6 | 681 | adev->vram_pin_size += amdgpu_bo_size(bo); |
6681c5eb | 682 | if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) |
a7d64de6 | 683 | adev->invisible_pin_size += amdgpu_bo_size(bo); |
32ab75f0 | 684 | } else if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
a7d64de6 | 685 | adev->gart_pin_size += amdgpu_bo_size(bo); |
d38ceaf9 | 686 | } |
6681c5eb CK |
687 | |
688 | error: | |
d38ceaf9 AD |
689 | return r; |
690 | } | |
691 | ||
692 | int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr) | |
693 | { | |
7e5a547f | 694 | return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr); |
d38ceaf9 AD |
695 | } |
696 | ||
697 | int amdgpu_bo_unpin(struct amdgpu_bo *bo) | |
698 | { | |
a7d64de6 | 699 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
d38ceaf9 AD |
700 | int r, i; |
701 | ||
702 | if (!bo->pin_count) { | |
a7d64de6 | 703 | dev_warn(adev->dev, "%p unpin not necessary\n", bo); |
d38ceaf9 AD |
704 | return 0; |
705 | } | |
706 | bo->pin_count--; | |
707 | if (bo->pin_count) | |
708 | return 0; | |
709 | for (i = 0; i < bo->placement.num_placement; i++) { | |
710 | bo->placements[i].lpfn = 0; | |
711 | bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; | |
712 | } | |
713 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); | |
6681c5eb | 714 | if (unlikely(r)) { |
a7d64de6 | 715 | dev_err(adev->dev, "%p validate failed for unpin\n", bo); |
6681c5eb | 716 | goto error; |
d38ceaf9 | 717 | } |
6681c5eb CK |
718 | |
719 | if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { | |
a7d64de6 | 720 | adev->vram_pin_size -= amdgpu_bo_size(bo); |
6681c5eb | 721 | if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) |
a7d64de6 | 722 | adev->invisible_pin_size -= amdgpu_bo_size(bo); |
441f90ec | 723 | } else if (bo->tbo.mem.mem_type == TTM_PL_TT) { |
a7d64de6 | 724 | adev->gart_pin_size -= amdgpu_bo_size(bo); |
6681c5eb CK |
725 | } |
726 | ||
727 | error: | |
d38ceaf9 AD |
728 | return r; |
729 | } | |
730 | ||
731 | int amdgpu_bo_evict_vram(struct amdgpu_device *adev) | |
732 | { | |
733 | /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ | |
2f7d10b3 | 734 | if (0 && (adev->flags & AMD_IS_APU)) { |
d38ceaf9 AD |
735 | /* Useless to evict on IGP chips */ |
736 | return 0; | |
737 | } | |
738 | return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); | |
739 | } | |
740 | ||
1f8628c7 AD |
741 | static const char *amdgpu_vram_names[] = { |
742 | "UNKNOWN", | |
743 | "GDDR1", | |
744 | "DDR2", | |
745 | "GDDR3", | |
746 | "GDDR4", | |
747 | "GDDR5", | |
748 | "HBM", | |
749 | "DDR3" | |
750 | }; | |
751 | ||
d38ceaf9 AD |
752 | int amdgpu_bo_init(struct amdgpu_device *adev) |
753 | { | |
7cf321d1 DA |
754 | /* reserve PAT memory space to WC for VRAM */ |
755 | arch_io_reserve_memtype_wc(adev->mc.aper_base, | |
756 | adev->mc.aper_size); | |
757 | ||
d38ceaf9 AD |
758 | /* Add an MTRR for the VRAM */ |
759 | adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base, | |
760 | adev->mc.aper_size); | |
761 | DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", | |
762 | adev->mc.mc_vram_size >> 20, | |
763 | (unsigned long long)adev->mc.aper_size >> 20); | |
1f8628c7 AD |
764 | DRM_INFO("RAM width %dbits %s\n", |
765 | adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]); | |
d38ceaf9 AD |
766 | return amdgpu_ttm_init(adev); |
767 | } | |
768 | ||
769 | void amdgpu_bo_fini(struct amdgpu_device *adev) | |
770 | { | |
771 | amdgpu_ttm_fini(adev); | |
772 | arch_phys_wc_del(adev->mc.vram_mtrr); | |
7cf321d1 | 773 | arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size); |
d38ceaf9 AD |
774 | } |
775 | ||
776 | int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, | |
777 | struct vm_area_struct *vma) | |
778 | { | |
779 | return ttm_fbdev_mmap(vma, &bo->tbo); | |
780 | } | |
781 | ||
782 | int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) | |
783 | { | |
fbd76d59 | 784 | if (AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6) |
d38ceaf9 | 785 | return -EINVAL; |
d38ceaf9 AD |
786 | |
787 | bo->tiling_flags = tiling_flags; | |
788 | return 0; | |
789 | } | |
790 | ||
791 | void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) | |
792 | { | |
793 | lockdep_assert_held(&bo->tbo.resv->lock.base); | |
794 | ||
795 | if (tiling_flags) | |
796 | *tiling_flags = bo->tiling_flags; | |
797 | } | |
798 | ||
799 | int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, | |
800 | uint32_t metadata_size, uint64_t flags) | |
801 | { | |
802 | void *buffer; | |
803 | ||
804 | if (!metadata_size) { | |
805 | if (bo->metadata_size) { | |
806 | kfree(bo->metadata); | |
0092d3ed | 807 | bo->metadata = NULL; |
d38ceaf9 AD |
808 | bo->metadata_size = 0; |
809 | } | |
810 | return 0; | |
811 | } | |
812 | ||
813 | if (metadata == NULL) | |
814 | return -EINVAL; | |
815 | ||
71affda5 | 816 | buffer = kmemdup(metadata, metadata_size, GFP_KERNEL); |
d38ceaf9 AD |
817 | if (buffer == NULL) |
818 | return -ENOMEM; | |
819 | ||
d38ceaf9 AD |
820 | kfree(bo->metadata); |
821 | bo->metadata_flags = flags; | |
822 | bo->metadata = buffer; | |
823 | bo->metadata_size = metadata_size; | |
824 | ||
825 | return 0; | |
826 | } | |
827 | ||
828 | int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, | |
829 | size_t buffer_size, uint32_t *metadata_size, | |
830 | uint64_t *flags) | |
831 | { | |
832 | if (!buffer && !metadata_size) | |
833 | return -EINVAL; | |
834 | ||
835 | if (buffer) { | |
836 | if (buffer_size < bo->metadata_size) | |
837 | return -EINVAL; | |
838 | ||
839 | if (bo->metadata_size) | |
840 | memcpy(buffer, bo->metadata, bo->metadata_size); | |
841 | } | |
842 | ||
843 | if (metadata_size) | |
844 | *metadata_size = bo->metadata_size; | |
845 | if (flags) | |
846 | *flags = bo->metadata_flags; | |
847 | ||
848 | return 0; | |
849 | } | |
850 | ||
851 | void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, | |
852 | struct ttm_mem_reg *new_mem) | |
853 | { | |
a7d64de6 | 854 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); |
765e7fbf | 855 | struct amdgpu_bo *abo; |
15da301d | 856 | struct ttm_mem_reg *old_mem = &bo->mem; |
d38ceaf9 AD |
857 | |
858 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) | |
859 | return; | |
860 | ||
765e7fbf | 861 | abo = container_of(bo, struct amdgpu_bo, tbo); |
a7d64de6 | 862 | amdgpu_vm_bo_invalidate(adev, abo); |
d38ceaf9 AD |
863 | |
864 | /* update statistics */ | |
865 | if (!new_mem) | |
866 | return; | |
867 | ||
868 | /* move_notify is called before move happens */ | |
a7d64de6 | 869 | amdgpu_update_memory_usage(adev, &bo->mem, new_mem); |
15da301d | 870 | |
765e7fbf | 871 | trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); |
d38ceaf9 AD |
872 | } |
873 | ||
874 | int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) | |
875 | { | |
a7d64de6 | 876 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); |
5fb1941d CK |
877 | struct amdgpu_bo *abo; |
878 | unsigned long offset, size, lpfn; | |
879 | int i, r; | |
d38ceaf9 AD |
880 | |
881 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) | |
882 | return 0; | |
5fb1941d CK |
883 | |
884 | abo = container_of(bo, struct amdgpu_bo, tbo); | |
5fb1941d CK |
885 | if (bo->mem.mem_type != TTM_PL_VRAM) |
886 | return 0; | |
887 | ||
888 | size = bo->mem.num_pages << PAGE_SHIFT; | |
889 | offset = bo->mem.start << PAGE_SHIFT; | |
03f48dd5 CK |
890 | /* TODO: figure out how to map scattered VRAM to the CPU */ |
891 | if ((offset + size) <= adev->mc.visible_vram_size && | |
892 | (abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) | |
5fb1941d CK |
893 | return 0; |
894 | ||
104ece97 MD |
895 | /* Can't move a pinned BO to visible VRAM */ |
896 | if (abo->pin_count > 0) | |
897 | return -EINVAL; | |
898 | ||
5fb1941d | 899 | /* hurrah the memory is not visible ! */ |
03f48dd5 | 900 | abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
5fb1941d CK |
901 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); |
902 | lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; | |
903 | for (i = 0; i < abo->placement.num_placement; i++) { | |
904 | /* Force into visible VRAM */ | |
905 | if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) && | |
6681c5eb CK |
906 | (!abo->placements[i].lpfn || |
907 | abo->placements[i].lpfn > lpfn)) | |
5fb1941d CK |
908 | abo->placements[i].lpfn = lpfn; |
909 | } | |
910 | r = ttm_bo_validate(bo, &abo->placement, false, false); | |
911 | if (unlikely(r == -ENOMEM)) { | |
912 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); | |
913 | return ttm_bo_validate(bo, &abo->placement, false, false); | |
914 | } else if (unlikely(r != 0)) { | |
915 | return r; | |
d38ceaf9 | 916 | } |
5fb1941d CK |
917 | |
918 | offset = bo->mem.start << PAGE_SHIFT; | |
919 | /* this should never happen */ | |
920 | if ((offset + size) > adev->mc.visible_vram_size) | |
921 | return -EINVAL; | |
922 | ||
d38ceaf9 AD |
923 | return 0; |
924 | } | |
925 | ||
926 | /** | |
927 | * amdgpu_bo_fence - add fence to buffer object | |
928 | * | |
929 | * @bo: buffer object in question | |
930 | * @fence: fence to add | |
931 | * @shared: true if fence should be added shared | |
932 | * | |
933 | */ | |
f54d1867 | 934 | void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, |
d38ceaf9 AD |
935 | bool shared) |
936 | { | |
937 | struct reservation_object *resv = bo->tbo.resv; | |
938 | ||
939 | if (shared) | |
e40a3115 | 940 | reservation_object_add_shared_fence(resv, fence); |
d38ceaf9 | 941 | else |
e40a3115 | 942 | reservation_object_add_excl_fence(resv, fence); |
d38ceaf9 | 943 | } |
cdb7e8f2 CK |
944 | |
945 | /** | |
946 | * amdgpu_bo_gpu_offset - return GPU offset of bo | |
947 | * @bo: amdgpu object for which we query the offset | |
948 | * | |
949 | * Returns current GPU offset of the object. | |
950 | * | |
951 | * Note: object should either be pinned or reserved when calling this | |
952 | * function, it might be useful to add check for this for debugging. | |
953 | */ | |
954 | u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) | |
955 | { | |
956 | WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); | |
c855e250 CK |
957 | WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT && |
958 | !amdgpu_ttm_is_bound(bo->tbo.ttm)); | |
cdb7e8f2 CK |
959 | WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && |
960 | !bo->pin_count); | |
9702d40d | 961 | WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); |
03f48dd5 CK |
962 | WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && |
963 | !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); | |
cdb7e8f2 CK |
964 | |
965 | return bo->tbo.offset; | |
966 | } |