drm/amd/powerplay: correct LoadLineResistance value in pptable.
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ttm.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2009 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26/*
27 * Authors:
28 * Jerome Glisse <glisse@freedesktop.org>
29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30 * Dave Airlie
31 */
32#include <ttm/ttm_bo_api.h>
33#include <ttm/ttm_bo_driver.h>
34#include <ttm/ttm_placement.h>
35#include <ttm/ttm_module.h>
36#include <ttm/ttm_page_alloc.h>
37#include <drm/drmP.h>
38#include <drm/amdgpu_drm.h>
39#include <linux/seq_file.h>
40#include <linux/slab.h>
41#include <linux/swiotlb.h>
42#include <linux/swap.h>
43#include <linux/pagemap.h>
44#include <linux/debugfs.h>
45#include "amdgpu.h"
46#include "bif/bif_4_1_d.h"
47
48#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
49
50static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
51static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
52
d38ceaf9
AD
53
54/*
55 * Global memory.
56 */
57static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
58{
59 return ttm_mem_global_init(ref->object);
60}
61
62static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
63{
64 ttm_mem_global_release(ref->object);
65}
66
70b5c5aa 67static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
d38ceaf9
AD
68{
69 struct drm_global_reference *global_ref;
703297c1
CK
70 struct amdgpu_ring *ring;
71 struct amd_sched_rq *rq;
d38ceaf9
AD
72 int r;
73
74 adev->mman.mem_global_referenced = false;
75 global_ref = &adev->mman.mem_global_ref;
76 global_ref->global_type = DRM_GLOBAL_TTM_MEM;
77 global_ref->size = sizeof(struct ttm_mem_global);
78 global_ref->init = &amdgpu_ttm_mem_global_init;
79 global_ref->release = &amdgpu_ttm_mem_global_release;
80 r = drm_global_item_ref(global_ref);
e9d035ec 81 if (r) {
d38ceaf9
AD
82 DRM_ERROR("Failed setting up TTM memory accounting "
83 "subsystem.\n");
e9d035ec 84 goto error_mem;
d38ceaf9
AD
85 }
86
87 adev->mman.bo_global_ref.mem_glob =
88 adev->mman.mem_global_ref.object;
89 global_ref = &adev->mman.bo_global_ref.ref;
90 global_ref->global_type = DRM_GLOBAL_TTM_BO;
91 global_ref->size = sizeof(struct ttm_bo_global);
92 global_ref->init = &ttm_bo_global_init;
93 global_ref->release = &ttm_bo_global_release;
94 r = drm_global_item_ref(global_ref);
e9d035ec 95 if (r) {
d38ceaf9 96 DRM_ERROR("Failed setting up TTM BO subsystem.\n");
e9d035ec 97 goto error_bo;
d38ceaf9
AD
98 }
99
703297c1
CK
100 ring = adev->mman.buffer_funcs_ring;
101 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
102 r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
103 rq, amdgpu_sched_jobs);
e9d035ec 104 if (r) {
703297c1 105 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
e9d035ec 106 goto error_entity;
703297c1
CK
107 }
108
d38ceaf9 109 adev->mman.mem_global_referenced = true;
703297c1 110
d38ceaf9 111 return 0;
e9d035ec
HR
112
113error_entity:
114 drm_global_item_unref(&adev->mman.bo_global_ref.ref);
115error_bo:
116 drm_global_item_unref(&adev->mman.mem_global_ref);
117error_mem:
118 return r;
d38ceaf9
AD
119}
120
121static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
122{
123 if (adev->mman.mem_global_referenced) {
703297c1
CK
124 amd_sched_entity_fini(adev->mman.entity.sched,
125 &adev->mman.entity);
d38ceaf9
AD
126 drm_global_item_unref(&adev->mman.bo_global_ref.ref);
127 drm_global_item_unref(&adev->mman.mem_global_ref);
128 adev->mman.mem_global_referenced = false;
129 }
130}
131
132static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
133{
134 return 0;
135}
136
137static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
138 struct ttm_mem_type_manager *man)
139{
140 struct amdgpu_device *adev;
141
a7d64de6 142 adev = amdgpu_ttm_adev(bdev);
d38ceaf9
AD
143
144 switch (type) {
145 case TTM_PL_SYSTEM:
146 /* System memory */
147 man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
148 man->available_caching = TTM_PL_MASK_CACHING;
149 man->default_caching = TTM_PL_FLAG_CACHED;
150 break;
151 case TTM_PL_TT:
bb990bb0 152 man->func = &amdgpu_gtt_mgr_func;
d38ceaf9
AD
153 man->gpu_offset = adev->mc.gtt_start;
154 man->available_caching = TTM_PL_MASK_CACHING;
155 man->default_caching = TTM_PL_FLAG_CACHED;
156 man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
157 break;
158 case TTM_PL_VRAM:
159 /* "On-card" video ram */
6a7f76e7 160 man->func = &amdgpu_vram_mgr_func;
d38ceaf9
AD
161 man->gpu_offset = adev->mc.vram_start;
162 man->flags = TTM_MEMTYPE_FLAG_FIXED |
163 TTM_MEMTYPE_FLAG_MAPPABLE;
164 man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
165 man->default_caching = TTM_PL_FLAG_WC;
166 break;
167 case AMDGPU_PL_GDS:
168 case AMDGPU_PL_GWS:
169 case AMDGPU_PL_OA:
170 /* On-chip GDS memory*/
171 man->func = &ttm_bo_manager_func;
172 man->gpu_offset = 0;
173 man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
174 man->available_caching = TTM_PL_FLAG_UNCACHED;
175 man->default_caching = TTM_PL_FLAG_UNCACHED;
176 break;
177 default:
178 DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
179 return -EINVAL;
180 }
181 return 0;
182}
183
184static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
185 struct ttm_placement *placement)
186{
a7d64de6 187 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
765e7fbf 188 struct amdgpu_bo *abo;
d38ceaf9
AD
189 static struct ttm_place placements = {
190 .fpfn = 0,
191 .lpfn = 0,
192 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
193 };
08291c5c 194 unsigned i;
d38ceaf9
AD
195
196 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
197 placement->placement = &placements;
198 placement->busy_placement = &placements;
199 placement->num_placement = 1;
200 placement->num_busy_placement = 1;
201 return;
202 }
765e7fbf 203 abo = container_of(bo, struct amdgpu_bo, tbo);
d38ceaf9
AD
204 switch (bo->mem.mem_type) {
205 case TTM_PL_VRAM:
cbcbea98
HR
206 if (adev->mman.buffer_funcs &&
207 adev->mman.buffer_funcs_ring &&
208 adev->mman.buffer_funcs_ring->ready == false) {
765e7fbf 209 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
08291c5c 210 } else {
765e7fbf
CK
211 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
212 for (i = 0; i < abo->placement.num_placement; ++i) {
213 if (!(abo->placements[i].flags &
08291c5c
CK
214 TTM_PL_FLAG_TT))
215 continue;
216
765e7fbf 217 if (abo->placements[i].lpfn)
08291c5c
CK
218 continue;
219
220 /* set an upper limit to force directly
221 * allocating address space for the BO.
222 */
765e7fbf 223 abo->placements[i].lpfn =
a7d64de6 224 adev->mc.gtt_size >> PAGE_SHIFT;
08291c5c
CK
225 }
226 }
d38ceaf9
AD
227 break;
228 case TTM_PL_TT:
229 default:
765e7fbf 230 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
d38ceaf9 231 }
765e7fbf 232 *placement = abo->placement;
d38ceaf9
AD
233}
234
235static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
236{
765e7fbf 237 struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
d38ceaf9 238
054892ed
JG
239 if (amdgpu_ttm_tt_get_usermm(bo->ttm))
240 return -EPERM;
28a39654 241 return drm_vma_node_verify_access(&abo->gem_base.vma_node,
d9a1f0b4 242 filp->private_data);
d38ceaf9
AD
243}
244
245static void amdgpu_move_null(struct ttm_buffer_object *bo,
246 struct ttm_mem_reg *new_mem)
247{
248 struct ttm_mem_reg *old_mem = &bo->mem;
249
250 BUG_ON(old_mem->mm_node != NULL);
251 *old_mem = *new_mem;
252 new_mem->mm_node = NULL;
253}
254
8892f153
CK
255static int amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
256 struct drm_mm_node *mm_node,
257 struct ttm_mem_reg *mem,
258 uint64_t *addr)
d38ceaf9 259{
d38ceaf9
AD
260 int r;
261
8892f153 262 switch (mem->mem_type) {
d38ceaf9 263 case TTM_PL_TT:
8892f153 264 r = amdgpu_ttm_bind(bo, mem);
c855e250
CK
265 if (r)
266 return r;
267
268 case TTM_PL_VRAM:
8892f153
CK
269 *addr = mm_node->start << PAGE_SHIFT;
270 *addr += bo->bdev->man[mem->mem_type].gpu_offset;
d38ceaf9
AD
271 break;
272 default:
8892f153 273 DRM_ERROR("Unknown placement %d\n", mem->mem_type);
d38ceaf9
AD
274 return -EINVAL;
275 }
c855e250 276
8892f153
CK
277 return 0;
278}
279
280static int amdgpu_move_blit(struct ttm_buffer_object *bo,
281 bool evict, bool no_wait_gpu,
282 struct ttm_mem_reg *new_mem,
283 struct ttm_mem_reg *old_mem)
284{
a7d64de6 285 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
8892f153
CK
286 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
287
288 struct drm_mm_node *old_mm, *new_mm;
289 uint64_t old_start, old_size, new_start, new_size;
290 unsigned long num_pages;
220196b3 291 struct dma_fence *fence = NULL;
8892f153
CK
292 int r;
293
294 BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
295
d38ceaf9
AD
296 if (!ring->ready) {
297 DRM_ERROR("Trying to move memory with ring turned off.\n");
298 return -EINVAL;
299 }
300
8892f153
CK
301 old_mm = old_mem->mm_node;
302 r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start);
303 if (r)
304 return r;
305 old_size = old_mm->size;
306
d38ceaf9 307
8892f153
CK
308 new_mm = new_mem->mm_node;
309 r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start);
ce64bc25
CK
310 if (r)
311 return r;
8892f153
CK
312 new_size = new_mm->size;
313
314 num_pages = new_mem->num_pages;
315 while (num_pages) {
316 unsigned long cur_pages = min(old_size, new_size);
220196b3 317 struct dma_fence *next;
8892f153
CK
318
319 r = amdgpu_copy_buffer(ring, old_start, new_start,
320 cur_pages * PAGE_SIZE,
321 bo->resv, &next, false);
322 if (r)
323 goto error;
324
220196b3 325 dma_fence_put(fence);
8892f153
CK
326 fence = next;
327
328 num_pages -= cur_pages;
329 if (!num_pages)
330 break;
331
332 old_size -= cur_pages;
333 if (!old_size) {
334 r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem,
335 &old_start);
336 if (r)
337 goto error;
338 old_size = old_mm->size;
339 } else {
340 old_start += cur_pages * PAGE_SIZE;
341 }
342
343 new_size -= cur_pages;
344 if (!new_size) {
345 r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem,
346 &new_start);
347 if (r)
348 goto error;
349
350 new_size = new_mm->size;
351 } else {
352 new_start += cur_pages * PAGE_SIZE;
353 }
354 }
ce64bc25
CK
355
356 r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
f54d1867 357 dma_fence_put(fence);
d38ceaf9 358 return r;
8892f153
CK
359
360error:
361 if (fence)
220196b3
DA
362 dma_fence_wait(fence, false);
363 dma_fence_put(fence);
8892f153 364 return r;
d38ceaf9
AD
365}
366
367static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
368 bool evict, bool interruptible,
369 bool no_wait_gpu,
370 struct ttm_mem_reg *new_mem)
371{
372 struct amdgpu_device *adev;
373 struct ttm_mem_reg *old_mem = &bo->mem;
374 struct ttm_mem_reg tmp_mem;
375 struct ttm_place placements;
376 struct ttm_placement placement;
377 int r;
378
a7d64de6 379 adev = amdgpu_ttm_adev(bo->bdev);
d38ceaf9
AD
380 tmp_mem = *new_mem;
381 tmp_mem.mm_node = NULL;
382 placement.num_placement = 1;
383 placement.placement = &placements;
384 placement.num_busy_placement = 1;
385 placement.busy_placement = &placements;
386 placements.fpfn = 0;
056472f1 387 placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
d38ceaf9
AD
388 placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
389 r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
390 interruptible, no_wait_gpu);
391 if (unlikely(r)) {
392 return r;
393 }
394
395 r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
396 if (unlikely(r)) {
397 goto out_cleanup;
398 }
399
400 r = ttm_tt_bind(bo->ttm, &tmp_mem);
401 if (unlikely(r)) {
402 goto out_cleanup;
403 }
404 r = amdgpu_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
405 if (unlikely(r)) {
406 goto out_cleanup;
407 }
4e2f0caa 408 r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
d38ceaf9
AD
409out_cleanup:
410 ttm_bo_mem_put(bo, &tmp_mem);
411 return r;
412}
413
414static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
415 bool evict, bool interruptible,
416 bool no_wait_gpu,
417 struct ttm_mem_reg *new_mem)
418{
419 struct amdgpu_device *adev;
420 struct ttm_mem_reg *old_mem = &bo->mem;
421 struct ttm_mem_reg tmp_mem;
422 struct ttm_placement placement;
423 struct ttm_place placements;
424 int r;
425
a7d64de6 426 adev = amdgpu_ttm_adev(bo->bdev);
d38ceaf9
AD
427 tmp_mem = *new_mem;
428 tmp_mem.mm_node = NULL;
429 placement.num_placement = 1;
430 placement.placement = &placements;
431 placement.num_busy_placement = 1;
432 placement.busy_placement = &placements;
433 placements.fpfn = 0;
056472f1 434 placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
d38ceaf9
AD
435 placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
436 r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
437 interruptible, no_wait_gpu);
438 if (unlikely(r)) {
439 return r;
440 }
4e2f0caa 441 r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
d38ceaf9
AD
442 if (unlikely(r)) {
443 goto out_cleanup;
444 }
445 r = amdgpu_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
446 if (unlikely(r)) {
447 goto out_cleanup;
448 }
449out_cleanup:
450 ttm_bo_mem_put(bo, &tmp_mem);
451 return r;
452}
453
454static int amdgpu_bo_move(struct ttm_buffer_object *bo,
455 bool evict, bool interruptible,
456 bool no_wait_gpu,
457 struct ttm_mem_reg *new_mem)
458{
459 struct amdgpu_device *adev;
104ece97 460 struct amdgpu_bo *abo;
d38ceaf9
AD
461 struct ttm_mem_reg *old_mem = &bo->mem;
462 int r;
463
104ece97
MD
464 /* Can't move a pinned BO */
465 abo = container_of(bo, struct amdgpu_bo, tbo);
466 if (WARN_ON_ONCE(abo->pin_count > 0))
467 return -EINVAL;
468
a7d64de6 469 adev = amdgpu_ttm_adev(bo->bdev);
dbd5ed60 470
d38ceaf9
AD
471 if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
472 amdgpu_move_null(bo, new_mem);
473 return 0;
474 }
475 if ((old_mem->mem_type == TTM_PL_TT &&
476 new_mem->mem_type == TTM_PL_SYSTEM) ||
477 (old_mem->mem_type == TTM_PL_SYSTEM &&
478 new_mem->mem_type == TTM_PL_TT)) {
479 /* bind is enough */
480 amdgpu_move_null(bo, new_mem);
481 return 0;
482 }
483 if (adev->mman.buffer_funcs == NULL ||
484 adev->mman.buffer_funcs_ring == NULL ||
485 !adev->mman.buffer_funcs_ring->ready) {
486 /* use memcpy */
487 goto memcpy;
488 }
489
490 if (old_mem->mem_type == TTM_PL_VRAM &&
491 new_mem->mem_type == TTM_PL_SYSTEM) {
492 r = amdgpu_move_vram_ram(bo, evict, interruptible,
493 no_wait_gpu, new_mem);
494 } else if (old_mem->mem_type == TTM_PL_SYSTEM &&
495 new_mem->mem_type == TTM_PL_VRAM) {
496 r = amdgpu_move_ram_vram(bo, evict, interruptible,
497 no_wait_gpu, new_mem);
498 } else {
499 r = amdgpu_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
500 }
501
502 if (r) {
503memcpy:
4499f2ac 504 r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
d38ceaf9
AD
505 if (r) {
506 return r;
507 }
508 }
509
510 /* update statistics */
511 atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);
512 return 0;
513}
514
515static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
516{
517 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
a7d64de6 518 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
d38ceaf9
AD
519
520 mem->bus.addr = NULL;
521 mem->bus.offset = 0;
522 mem->bus.size = mem->num_pages << PAGE_SHIFT;
523 mem->bus.base = 0;
524 mem->bus.is_iomem = false;
525 if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
526 return -EINVAL;
527 switch (mem->mem_type) {
528 case TTM_PL_SYSTEM:
529 /* system memory */
530 return 0;
531 case TTM_PL_TT:
532 break;
533 case TTM_PL_VRAM:
534 mem->bus.offset = mem->start << PAGE_SHIFT;
535 /* check if it's visible */
536 if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
537 return -EINVAL;
538 mem->bus.base = adev->mc.aper_base;
539 mem->bus.is_iomem = true;
d38ceaf9
AD
540 break;
541 default:
542 return -EINVAL;
543 }
544 return 0;
545}
546
547static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
548{
549}
550
9bbdcc0f
CK
551static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
552 unsigned long page_offset)
553{
554 struct drm_mm_node *mm = bo->mem.mm_node;
555 uint64_t size = mm->size;
01687781 556 uint64_t offset = page_offset;
9bbdcc0f
CK
557
558 page_offset = do_div(offset, size);
ecdba5db 559 mm += offset;
9bbdcc0f
CK
560 return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
561}
562
d38ceaf9
AD
563/*
564 * TTM backend functions.
565 */
637dd3b5
CK
566struct amdgpu_ttm_gup_task_list {
567 struct list_head list;
568 struct task_struct *task;
569};
570
d38ceaf9 571struct amdgpu_ttm_tt {
637dd3b5
CK
572 struct ttm_dma_tt ttm;
573 struct amdgpu_device *adev;
574 u64 offset;
575 uint64_t userptr;
576 struct mm_struct *usermm;
577 uint32_t userflags;
578 spinlock_t guptasklock;
579 struct list_head guptasks;
2f568dbd 580 atomic_t mmu_invalidations;
5c1354bd 581 struct list_head list;
d38ceaf9
AD
582};
583
2f568dbd 584int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
d38ceaf9 585{
d38ceaf9 586 struct amdgpu_ttm_tt *gtt = (void *)ttm;
768ae309 587 unsigned int flags = 0;
2f568dbd
CK
588 unsigned pinned = 0;
589 int r;
d38ceaf9 590
768ae309
LS
591 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
592 flags |= FOLL_WRITE;
593
d38ceaf9 594 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
2f568dbd 595 /* check that we only use anonymous memory
d38ceaf9
AD
596 to prevent problems with writeback */
597 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
598 struct vm_area_struct *vma;
599
600 vma = find_vma(gtt->usermm, gtt->userptr);
601 if (!vma || vma->vm_file || vma->vm_end < end)
602 return -EPERM;
603 }
604
605 do {
606 unsigned num_pages = ttm->num_pages - pinned;
607 uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
2f568dbd 608 struct page **p = pages + pinned;
637dd3b5
CK
609 struct amdgpu_ttm_gup_task_list guptask;
610
611 guptask.task = current;
612 spin_lock(&gtt->guptasklock);
613 list_add(&guptask.list, &gtt->guptasks);
614 spin_unlock(&gtt->guptasklock);
d38ceaf9 615
768ae309 616 r = get_user_pages(userptr, num_pages, flags, p, NULL);
637dd3b5
CK
617
618 spin_lock(&gtt->guptasklock);
619 list_del(&guptask.list);
620 spin_unlock(&gtt->guptasklock);
d38ceaf9 621
d38ceaf9
AD
622 if (r < 0)
623 goto release_pages;
624
625 pinned += r;
626
627 } while (pinned < ttm->num_pages);
628
2f568dbd
CK
629 return 0;
630
631release_pages:
632 release_pages(pages, pinned, 0);
633 return r;
634}
635
636/* prepare the sg table with the user pages */
637static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
638{
a7d64de6 639 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
2f568dbd
CK
640 struct amdgpu_ttm_tt *gtt = (void *)ttm;
641 unsigned nents;
642 int r;
643
644 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
645 enum dma_data_direction direction = write ?
646 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
647
d38ceaf9
AD
648 r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
649 ttm->num_pages << PAGE_SHIFT,
650 GFP_KERNEL);
651 if (r)
652 goto release_sg;
653
654 r = -ENOMEM;
655 nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
656 if (nents != ttm->sg->nents)
657 goto release_sg;
658
659 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
660 gtt->ttm.dma_address, ttm->num_pages);
661
662 return 0;
663
664release_sg:
665 kfree(ttm->sg);
d38ceaf9
AD
666 return r;
667}
668
669static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
670{
a7d64de6 671 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
d38ceaf9 672 struct amdgpu_ttm_tt *gtt = (void *)ttm;
dd08fae1 673 struct sg_page_iter sg_iter;
d38ceaf9
AD
674
675 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
676 enum dma_data_direction direction = write ?
677 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
678
679 /* double check that we don't free the table twice */
680 if (!ttm->sg->sgl)
681 return;
682
683 /* free the sg table and pages again */
684 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
685
dd08fae1 686 for_each_sg_page(ttm->sg->sgl, &sg_iter, ttm->sg->nents, 0) {
687 struct page *page = sg_page_iter_page(&sg_iter);
d38ceaf9
AD
688 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
689 set_page_dirty(page);
690
691 mark_page_accessed(page);
09cbfeaf 692 put_page(page);
d38ceaf9
AD
693 }
694
695 sg_free_table(ttm->sg);
696}
697
698static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
699 struct ttm_mem_reg *bo_mem)
700{
701 struct amdgpu_ttm_tt *gtt = (void*)ttm;
d38ceaf9
AD
702 int r;
703
e2f784fa
CZ
704 if (gtt->userptr) {
705 r = amdgpu_ttm_tt_pin_userptr(ttm);
706 if (r) {
707 DRM_ERROR("failed to pin userptr\n");
708 return r;
709 }
710 }
d38ceaf9
AD
711 if (!ttm->num_pages) {
712 WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
713 ttm->num_pages, bo_mem, ttm);
714 }
715
716 if (bo_mem->mem_type == AMDGPU_PL_GDS ||
717 bo_mem->mem_type == AMDGPU_PL_GWS ||
718 bo_mem->mem_type == AMDGPU_PL_OA)
719 return -EINVAL;
720
c855e250
CK
721 return 0;
722}
723
724bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
725{
726 struct amdgpu_ttm_tt *gtt = (void *)ttm;
727
728 return gtt && !list_empty(&gtt->list);
729}
730
bb990bb0 731int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
c855e250 732{
bb990bb0
CK
733 struct ttm_tt *ttm = bo->ttm;
734 struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
6b777607 735 uint64_t flags;
c855e250
CK
736 int r;
737
738 if (!ttm || amdgpu_ttm_is_bound(ttm))
739 return 0;
740
bb990bb0
CK
741 r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo,
742 NULL, bo_mem);
743 if (r) {
744 DRM_ERROR("Failed to allocate GTT address space (%d)\n", r);
745 return r;
746 }
747
c855e250 748 flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
bb990bb0 749 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
d38ceaf9
AD
750 r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
751 ttm->pages, gtt->ttm.dma_address, flags);
752
753 if (r) {
71c76a08
CK
754 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
755 ttm->num_pages, gtt->offset);
d38ceaf9
AD
756 return r;
757 }
5c1354bd
CZ
758 spin_lock(&gtt->adev->gtt_list_lock);
759 list_add_tail(&gtt->list, &gtt->adev->gtt_list);
760 spin_unlock(&gtt->adev->gtt_list_lock);
d38ceaf9
AD
761 return 0;
762}
763
2c0d7318
CZ
764int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
765{
766 struct amdgpu_ttm_tt *gtt, *tmp;
767 struct ttm_mem_reg bo_mem;
768 uint32_t flags;
769 int r;
770
771 bo_mem.mem_type = TTM_PL_TT;
772 spin_lock(&adev->gtt_list_lock);
773 list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) {
774 flags = amdgpu_ttm_tt_pte_flags(gtt->adev, &gtt->ttm.ttm, &bo_mem);
775 r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
776 gtt->ttm.ttm.pages, gtt->ttm.dma_address,
777 flags);
778 if (r) {
779 spin_unlock(&adev->gtt_list_lock);
71c76a08
CK
780 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
781 gtt->ttm.ttm.num_pages, gtt->offset);
2c0d7318
CZ
782 return r;
783 }
784 }
785 spin_unlock(&adev->gtt_list_lock);
786 return 0;
787}
788
d38ceaf9
AD
789static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
790{
791 struct amdgpu_ttm_tt *gtt = (void *)ttm;
792
85a4b579
CK
793 if (gtt->userptr)
794 amdgpu_ttm_tt_unpin_userptr(ttm);
795
78ab0a38
CK
796 if (!amdgpu_ttm_is_bound(ttm))
797 return 0;
798
d38ceaf9
AD
799 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
800 if (gtt->adev->gart.ready)
801 amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
802
5c1354bd
CZ
803 spin_lock(&gtt->adev->gtt_list_lock);
804 list_del_init(&gtt->list);
805 spin_unlock(&gtt->adev->gtt_list_lock);
806
d38ceaf9
AD
807 return 0;
808}
809
810static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
811{
812 struct amdgpu_ttm_tt *gtt = (void *)ttm;
813
814 ttm_dma_tt_fini(&gtt->ttm);
815 kfree(gtt);
816}
817
818static struct ttm_backend_func amdgpu_backend_func = {
819 .bind = &amdgpu_ttm_backend_bind,
820 .unbind = &amdgpu_ttm_backend_unbind,
821 .destroy = &amdgpu_ttm_backend_destroy,
822};
823
824static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
825 unsigned long size, uint32_t page_flags,
826 struct page *dummy_read_page)
827{
828 struct amdgpu_device *adev;
829 struct amdgpu_ttm_tt *gtt;
830
a7d64de6 831 adev = amdgpu_ttm_adev(bdev);
d38ceaf9
AD
832
833 gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
834 if (gtt == NULL) {
835 return NULL;
836 }
837 gtt->ttm.ttm.func = &amdgpu_backend_func;
838 gtt->adev = adev;
839 if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
840 kfree(gtt);
841 return NULL;
842 }
5c1354bd 843 INIT_LIST_HEAD(&gtt->list);
d38ceaf9
AD
844 return &gtt->ttm.ttm;
845}
846
847static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
848{
849 struct amdgpu_device *adev;
850 struct amdgpu_ttm_tt *gtt = (void *)ttm;
851 unsigned i;
852 int r;
853 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
854
855 if (ttm->state != tt_unpopulated)
856 return 0;
857
858 if (gtt && gtt->userptr) {
5f0b34cc 859 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
d38ceaf9
AD
860 if (!ttm->sg)
861 return -ENOMEM;
862
863 ttm->page_flags |= TTM_PAGE_FLAG_SG;
864 ttm->state = tt_unbound;
865 return 0;
866 }
867
868 if (slave && ttm->sg) {
869 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
870 gtt->ttm.dma_address, ttm->num_pages);
871 ttm->state = tt_unbound;
872 return 0;
873 }
874
a7d64de6 875 adev = amdgpu_ttm_adev(ttm->bdev);
d38ceaf9
AD
876
877#ifdef CONFIG_SWIOTLB
878 if (swiotlb_nr_tbl()) {
879 return ttm_dma_populate(&gtt->ttm, adev->dev);
880 }
881#endif
882
883 r = ttm_pool_populate(ttm);
884 if (r) {
885 return r;
886 }
887
888 for (i = 0; i < ttm->num_pages; i++) {
889 gtt->ttm.dma_address[i] = pci_map_page(adev->pdev, ttm->pages[i],
890 0, PAGE_SIZE,
891 PCI_DMA_BIDIRECTIONAL);
892 if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) {
09ccbb74 893 while (i--) {
d38ceaf9
AD
894 pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i],
895 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
896 gtt->ttm.dma_address[i] = 0;
897 }
898 ttm_pool_unpopulate(ttm);
899 return -EFAULT;
900 }
901 }
902 return 0;
903}
904
905static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
906{
907 struct amdgpu_device *adev;
908 struct amdgpu_ttm_tt *gtt = (void *)ttm;
909 unsigned i;
910 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
911
912 if (gtt && gtt->userptr) {
913 kfree(ttm->sg);
914 ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
915 return;
916 }
917
918 if (slave)
919 return;
920
a7d64de6 921 adev = amdgpu_ttm_adev(ttm->bdev);
d38ceaf9
AD
922
923#ifdef CONFIG_SWIOTLB
924 if (swiotlb_nr_tbl()) {
925 ttm_dma_unpopulate(&gtt->ttm, adev->dev);
926 return;
927 }
928#endif
929
930 for (i = 0; i < ttm->num_pages; i++) {
931 if (gtt->ttm.dma_address[i]) {
932 pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i],
933 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
934 }
935 }
936
937 ttm_pool_unpopulate(ttm);
938}
939
940int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
941 uint32_t flags)
942{
943 struct amdgpu_ttm_tt *gtt = (void *)ttm;
944
945 if (gtt == NULL)
946 return -EINVAL;
947
948 gtt->userptr = addr;
949 gtt->usermm = current->mm;
950 gtt->userflags = flags;
637dd3b5
CK
951 spin_lock_init(&gtt->guptasklock);
952 INIT_LIST_HEAD(&gtt->guptasks);
2f568dbd 953 atomic_set(&gtt->mmu_invalidations, 0);
637dd3b5 954
d38ceaf9
AD
955 return 0;
956}
957
cc325d19 958struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
d38ceaf9
AD
959{
960 struct amdgpu_ttm_tt *gtt = (void *)ttm;
961
962 if (gtt == NULL)
cc325d19 963 return NULL;
d38ceaf9 964
cc325d19 965 return gtt->usermm;
d38ceaf9
AD
966}
967
cc1de6e8
CK
968bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
969 unsigned long end)
970{
971 struct amdgpu_ttm_tt *gtt = (void *)ttm;
637dd3b5 972 struct amdgpu_ttm_gup_task_list *entry;
cc1de6e8
CK
973 unsigned long size;
974
637dd3b5 975 if (gtt == NULL || !gtt->userptr)
cc1de6e8
CK
976 return false;
977
978 size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
979 if (gtt->userptr > end || gtt->userptr + size <= start)
980 return false;
981
637dd3b5
CK
982 spin_lock(&gtt->guptasklock);
983 list_for_each_entry(entry, &gtt->guptasks, list) {
984 if (entry->task == current) {
985 spin_unlock(&gtt->guptasklock);
986 return false;
987 }
988 }
989 spin_unlock(&gtt->guptasklock);
990
2f568dbd
CK
991 atomic_inc(&gtt->mmu_invalidations);
992
cc1de6e8
CK
993 return true;
994}
995
2f568dbd
CK
996bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
997 int *last_invalidated)
998{
999 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1000 int prev_invalidated = *last_invalidated;
1001
1002 *last_invalidated = atomic_read(&gtt->mmu_invalidations);
1003 return prev_invalidated != *last_invalidated;
1004}
1005
d38ceaf9
AD
1006bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1007{
1008 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1009
1010 if (gtt == NULL)
1011 return false;
1012
1013 return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1014}
1015
6b777607 1016uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
d38ceaf9
AD
1017 struct ttm_mem_reg *mem)
1018{
6b777607 1019 uint64_t flags = 0;
d38ceaf9
AD
1020
1021 if (mem && mem->mem_type != TTM_PL_SYSTEM)
1022 flags |= AMDGPU_PTE_VALID;
1023
6d99905a 1024 if (mem && mem->mem_type == TTM_PL_TT) {
d38ceaf9
AD
1025 flags |= AMDGPU_PTE_SYSTEM;
1026
6d99905a
CK
1027 if (ttm->caching_state == tt_cached)
1028 flags |= AMDGPU_PTE_SNOOPED;
1029 }
d38ceaf9 1030
4b98e0c4 1031 flags |= adev->gart.gart_pte_flags;
d38ceaf9
AD
1032 flags |= AMDGPU_PTE_READABLE;
1033
1034 if (!amdgpu_ttm_tt_is_readonly(ttm))
1035 flags |= AMDGPU_PTE_WRITEABLE;
1036
1037 return flags;
1038}
1039
9982ca68
CK
1040static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1041 const struct ttm_place *place)
1042{
4fcae787
CK
1043 unsigned long num_pages = bo->mem.num_pages;
1044 struct drm_mm_node *node = bo->mem.mm_node;
9982ca68 1045
4fcae787
CK
1046 if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
1047 return ttm_bo_eviction_valuable(bo, place);
1048
1049 switch (bo->mem.mem_type) {
1050 case TTM_PL_TT:
1051 return true;
1052
1053 case TTM_PL_VRAM:
9982ca68
CK
1054 /* Check each drm MM node individually */
1055 while (num_pages) {
1056 if (place->fpfn < (node->start + node->size) &&
1057 !(place->lpfn && place->lpfn <= node->start))
1058 return true;
1059
1060 num_pages -= node->size;
1061 ++node;
1062 }
4fcae787 1063 break;
9982ca68 1064
4fcae787
CK
1065 default:
1066 break;
9982ca68
CK
1067 }
1068
1069 return ttm_bo_eviction_valuable(bo, place);
1070}
1071
d38ceaf9
AD
1072static struct ttm_bo_driver amdgpu_bo_driver = {
1073 .ttm_tt_create = &amdgpu_ttm_tt_create,
1074 .ttm_tt_populate = &amdgpu_ttm_tt_populate,
1075 .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1076 .invalidate_caches = &amdgpu_invalidate_caches,
1077 .init_mem_type = &amdgpu_init_mem_type,
9982ca68 1078 .eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
d38ceaf9
AD
1079 .evict_flags = &amdgpu_evict_flags,
1080 .move = &amdgpu_bo_move,
1081 .verify_access = &amdgpu_verify_access,
1082 .move_notify = &amdgpu_bo_move_notify,
1083 .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
1084 .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1085 .io_mem_free = &amdgpu_ttm_io_mem_free,
9bbdcc0f 1086 .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
d38ceaf9
AD
1087};
1088
1089int amdgpu_ttm_init(struct amdgpu_device *adev)
1090{
1091 int r;
1092
70b5c5aa
AD
1093 r = amdgpu_ttm_global_init(adev);
1094 if (r) {
1095 return r;
1096 }
d38ceaf9
AD
1097 /* No others user of address space so set it to 0 */
1098 r = ttm_bo_device_init(&adev->mman.bdev,
1099 adev->mman.bo_global_ref.ref.object,
1100 &amdgpu_bo_driver,
1101 adev->ddev->anon_inode->i_mapping,
1102 DRM_FILE_PAGE_OFFSET,
1103 adev->need_dma32);
1104 if (r) {
1105 DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1106 return r;
1107 }
1108 adev->mman.initialized = true;
1109 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1110 adev->mc.real_vram_size >> PAGE_SHIFT);
1111 if (r) {
1112 DRM_ERROR("Failed initializing VRAM heap.\n");
1113 return r;
1114 }
1115 /* Change the size here instead of the init above so only lpfn is affected */
1116 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
1117
1118 r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true,
857d913d 1119 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1120 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1121 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
72d7668b 1122 NULL, NULL, &adev->stollen_vga_memory);
d38ceaf9
AD
1123 if (r) {
1124 return r;
1125 }
1126 r = amdgpu_bo_reserve(adev->stollen_vga_memory, false);
1127 if (r)
1128 return r;
1129 r = amdgpu_bo_pin(adev->stollen_vga_memory, AMDGPU_GEM_DOMAIN_VRAM, NULL);
1130 amdgpu_bo_unreserve(adev->stollen_vga_memory);
1131 if (r) {
1132 amdgpu_bo_unref(&adev->stollen_vga_memory);
1133 return r;
1134 }
1135 DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1136 (unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
1137 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT,
1138 adev->mc.gtt_size >> PAGE_SHIFT);
1139 if (r) {
1140 DRM_ERROR("Failed initializing GTT heap.\n");
1141 return r;
1142 }
1143 DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1144 (unsigned)(adev->mc.gtt_size / (1024 * 1024)));
1145
1146 adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
1147 adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
1148 adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
1149 adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
1150 adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
1151 adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
1152 adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
1153 adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
1154 adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
1155 /* GDS Memory */
d2d51d81
AD
1156 if (adev->gds.mem.total_size) {
1157 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
1158 adev->gds.mem.total_size >> PAGE_SHIFT);
1159 if (r) {
1160 DRM_ERROR("Failed initializing GDS heap.\n");
1161 return r;
1162 }
d38ceaf9
AD
1163 }
1164
1165 /* GWS */
d2d51d81
AD
1166 if (adev->gds.gws.total_size) {
1167 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
1168 adev->gds.gws.total_size >> PAGE_SHIFT);
1169 if (r) {
1170 DRM_ERROR("Failed initializing gws heap.\n");
1171 return r;
1172 }
d38ceaf9
AD
1173 }
1174
1175 /* OA */
d2d51d81
AD
1176 if (adev->gds.oa.total_size) {
1177 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
1178 adev->gds.oa.total_size >> PAGE_SHIFT);
1179 if (r) {
1180 DRM_ERROR("Failed initializing oa heap.\n");
1181 return r;
1182 }
d38ceaf9
AD
1183 }
1184
1185 r = amdgpu_ttm_debugfs_init(adev);
1186 if (r) {
1187 DRM_ERROR("Failed to init debugfs\n");
1188 return r;
1189 }
1190 return 0;
1191}
1192
1193void amdgpu_ttm_fini(struct amdgpu_device *adev)
1194{
1195 int r;
1196
1197 if (!adev->mman.initialized)
1198 return;
1199 amdgpu_ttm_debugfs_fini(adev);
1200 if (adev->stollen_vga_memory) {
c81a1a74 1201 r = amdgpu_bo_reserve(adev->stollen_vga_memory, true);
d38ceaf9
AD
1202 if (r == 0) {
1203 amdgpu_bo_unpin(adev->stollen_vga_memory);
1204 amdgpu_bo_unreserve(adev->stollen_vga_memory);
1205 }
1206 amdgpu_bo_unref(&adev->stollen_vga_memory);
1207 }
1208 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
1209 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
d2d51d81
AD
1210 if (adev->gds.mem.total_size)
1211 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
1212 if (adev->gds.gws.total_size)
1213 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
1214 if (adev->gds.oa.total_size)
1215 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
d38ceaf9
AD
1216 ttm_bo_device_release(&adev->mman.bdev);
1217 amdgpu_gart_fini(adev);
1218 amdgpu_ttm_global_fini(adev);
1219 adev->mman.initialized = false;
1220 DRM_INFO("amdgpu: ttm finalized\n");
1221}
1222
1223/* this should only be called at bootup or when userspace
1224 * isn't running */
1225void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
1226{
1227 struct ttm_mem_type_manager *man;
1228
1229 if (!adev->mman.initialized)
1230 return;
1231
1232 man = &adev->mman.bdev.man[TTM_PL_VRAM];
1233 /* this just adjusts TTM size idea, which sets lpfn to the correct value */
1234 man->size = size >> PAGE_SHIFT;
1235}
1236
d38ceaf9
AD
1237int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
1238{
1239 struct drm_file *file_priv;
1240 struct amdgpu_device *adev;
d38ceaf9 1241
e176fe17 1242 if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
d38ceaf9 1243 return -EINVAL;
d38ceaf9
AD
1244
1245 file_priv = filp->private_data;
1246 adev = file_priv->minor->dev->dev_private;
e176fe17 1247 if (adev == NULL)
d38ceaf9 1248 return -EINVAL;
e176fe17
CK
1249
1250 return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
d38ceaf9
AD
1251}
1252
1253int amdgpu_copy_buffer(struct amdgpu_ring *ring,
1254 uint64_t src_offset,
1255 uint64_t dst_offset,
1256 uint32_t byte_count,
1257 struct reservation_object *resv,
f54d1867 1258 struct dma_fence **fence, bool direct_submit)
d38ceaf9
AD
1259{
1260 struct amdgpu_device *adev = ring->adev;
d71518b5
CK
1261 struct amdgpu_job *job;
1262
d38ceaf9
AD
1263 uint32_t max_bytes;
1264 unsigned num_loops, num_dw;
1265 unsigned i;
1266 int r;
1267
d38ceaf9
AD
1268 max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
1269 num_loops = DIV_ROUND_UP(byte_count, max_bytes);
1270 num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
1271
c7ae72c0
CZ
1272 /* for IB padding */
1273 while (num_dw & 0x7)
1274 num_dw++;
1275
d71518b5
CK
1276 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1277 if (r)
9066b0c3 1278 return r;
c7ae72c0
CZ
1279
1280 if (resv) {
e86f9cee 1281 r = amdgpu_sync_resv(adev, &job->sync, resv,
c7ae72c0
CZ
1282 AMDGPU_FENCE_OWNER_UNDEFINED);
1283 if (r) {
1284 DRM_ERROR("sync failed (%d).\n", r);
1285 goto error_free;
1286 }
d38ceaf9 1287 }
d38ceaf9
AD
1288
1289 for (i = 0; i < num_loops; i++) {
1290 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1291
d71518b5
CK
1292 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1293 dst_offset, cur_size_in_bytes);
d38ceaf9
AD
1294
1295 src_offset += cur_size_in_bytes;
1296 dst_offset += cur_size_in_bytes;
1297 byte_count -= cur_size_in_bytes;
1298 }
1299
d71518b5
CK
1300 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1301 WARN_ON(job->ibs[0].length_dw > num_dw);
e24db985
CZ
1302 if (direct_submit) {
1303 r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
50ddc75e 1304 NULL, fence);
f54d1867 1305 job->fence = dma_fence_get(*fence);
e24db985
CZ
1306 if (r)
1307 DRM_ERROR("Error scheduling IBs (%d)\n", r);
1308 amdgpu_job_free(job);
1309 } else {
1310 r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1311 AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1312 if (r)
1313 goto error_free;
1314 }
d38ceaf9 1315
e24db985 1316 return r;
d71518b5 1317
c7ae72c0 1318error_free:
d71518b5 1319 amdgpu_job_free(job);
c7ae72c0 1320 return r;
d38ceaf9
AD
1321}
1322
59b4a977 1323int amdgpu_fill_buffer(struct amdgpu_bo *bo,
f29224a6
CK
1324 uint32_t src_data,
1325 struct reservation_object *resv,
1326 struct dma_fence **fence)
59b4a977 1327{
a7d64de6 1328 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
f29224a6 1329 uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
59b4a977
FC
1330 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1331
f29224a6
CK
1332 struct drm_mm_node *mm_node;
1333 unsigned long num_pages;
59b4a977 1334 unsigned int num_loops, num_dw;
f29224a6
CK
1335
1336 struct amdgpu_job *job;
59b4a977
FC
1337 int r;
1338
f29224a6
CK
1339 if (!ring->ready) {
1340 DRM_ERROR("Trying to clear memory with ring turned off.\n");
1341 return -EINVAL;
1342 }
1343
1344 num_pages = bo->tbo.num_pages;
1345 mm_node = bo->tbo.mem.mm_node;
1346 num_loops = 0;
1347 while (num_pages) {
1348 uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1349
1350 num_loops += DIV_ROUND_UP(byte_count, max_bytes);
1351 num_pages -= mm_node->size;
1352 ++mm_node;
1353 }
59b4a977
FC
1354 num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
1355
1356 /* for IB padding */
f29224a6 1357 num_dw += 64;
59b4a977
FC
1358
1359 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1360 if (r)
1361 return r;
1362
1363 if (resv) {
1364 r = amdgpu_sync_resv(adev, &job->sync, resv,
f29224a6 1365 AMDGPU_FENCE_OWNER_UNDEFINED);
59b4a977
FC
1366 if (r) {
1367 DRM_ERROR("sync failed (%d).\n", r);
1368 goto error_free;
1369 }
1370 }
1371
f29224a6
CK
1372 num_pages = bo->tbo.num_pages;
1373 mm_node = bo->tbo.mem.mm_node;
59b4a977 1374
f29224a6
CK
1375 while (num_pages) {
1376 uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1377 uint64_t dst_addr;
59b4a977 1378
f29224a6
CK
1379 r = amdgpu_mm_node_addr(&bo->tbo, mm_node,
1380 &bo->tbo.mem, &dst_addr);
1381 if (r)
1382 return r;
1383
1384 while (byte_count) {
1385 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1386
1387 amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
1388 dst_addr, cur_size_in_bytes);
1389
1390 dst_addr += cur_size_in_bytes;
1391 byte_count -= cur_size_in_bytes;
1392 }
1393
1394 num_pages -= mm_node->size;
1395 ++mm_node;
59b4a977
FC
1396 }
1397
1398 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1399 WARN_ON(job->ibs[0].length_dw > num_dw);
1400 r = amdgpu_job_submit(job, ring, &adev->mman.entity,
f29224a6 1401 AMDGPU_FENCE_OWNER_UNDEFINED, fence);
59b4a977
FC
1402 if (r)
1403 goto error_free;
1404
1405 return 0;
1406
1407error_free:
1408 amdgpu_job_free(job);
1409 return r;
1410}
1411
d38ceaf9
AD
1412#if defined(CONFIG_DEBUG_FS)
1413
05a72a28
CZ
1414extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager
1415 *man);
d38ceaf9
AD
1416static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
1417{
1418 struct drm_info_node *node = (struct drm_info_node *)m->private;
1419 unsigned ttm_pl = *(int *)node->info_ent->data;
1420 struct drm_device *dev = node->minor->dev;
1421 struct amdgpu_device *adev = dev->dev_private;
1422 struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv;
d38ceaf9 1423 struct ttm_bo_global *glob = adev->mman.bdev.glob;
b5c3714f 1424 struct drm_printer p = drm_seq_file_printer(m);
d38ceaf9
AD
1425
1426 spin_lock(&glob->lru_lock);
b5c3714f 1427 drm_mm_print(mm, &p);
d38ceaf9 1428 spin_unlock(&glob->lru_lock);
05a72a28
CZ
1429 switch (ttm_pl) {
1430 case TTM_PL_VRAM:
e1b35f61 1431 seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
a2ef8a97 1432 adev->mman.bdev.man[ttm_pl].size,
e1b35f61
AB
1433 (u64)atomic64_read(&adev->vram_usage) >> 20,
1434 (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
05a72a28
CZ
1435 break;
1436 case TTM_PL_TT:
1437 amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]);
1438 break;
1439 }
b5c3714f 1440 return 0;
d38ceaf9
AD
1441}
1442
1443static int ttm_pl_vram = TTM_PL_VRAM;
1444static int ttm_pl_tt = TTM_PL_TT;
1445
06ab6832 1446static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
d38ceaf9
AD
1447 {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
1448 {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
1449 {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
1450#ifdef CONFIG_SWIOTLB
1451 {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
1452#endif
1453};
1454
1455static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1456 size_t size, loff_t *pos)
1457{
45063097 1458 struct amdgpu_device *adev = file_inode(f)->i_private;
d38ceaf9
AD
1459 ssize_t result = 0;
1460 int r;
1461
1462 if (size & 0x3 || *pos & 0x3)
1463 return -EINVAL;
1464
1465 while (size) {
1466 unsigned long flags;
1467 uint32_t value;
1468
1469 if (*pos >= adev->mc.mc_vram_size)
1470 return result;
1471
1472 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1473 WREG32(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1474 WREG32(mmMM_INDEX_HI, *pos >> 31);
1475 value = RREG32(mmMM_DATA);
1476 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1477
1478 r = put_user(value, (uint32_t *)buf);
1479 if (r)
1480 return r;
1481
1482 result += 4;
1483 buf += 4;
1484 *pos += 4;
1485 size -= 4;
1486 }
1487
1488 return result;
1489}
1490
1491static const struct file_operations amdgpu_ttm_vram_fops = {
1492 .owner = THIS_MODULE,
1493 .read = amdgpu_ttm_vram_read,
1494 .llseek = default_llseek
1495};
1496
a1d29476
CK
1497#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1498
d38ceaf9
AD
1499static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
1500 size_t size, loff_t *pos)
1501{
45063097 1502 struct amdgpu_device *adev = file_inode(f)->i_private;
d38ceaf9
AD
1503 ssize_t result = 0;
1504 int r;
1505
1506 while (size) {
1507 loff_t p = *pos / PAGE_SIZE;
1508 unsigned off = *pos & ~PAGE_MASK;
1509 size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
1510 struct page *page;
1511 void *ptr;
1512
1513 if (p >= adev->gart.num_cpu_pages)
1514 return result;
1515
1516 page = adev->gart.pages[p];
1517 if (page) {
1518 ptr = kmap(page);
1519 ptr += off;
1520
1521 r = copy_to_user(buf, ptr, cur_size);
1522 kunmap(adev->gart.pages[p]);
1523 } else
1524 r = clear_user(buf, cur_size);
1525
1526 if (r)
1527 return -EFAULT;
1528
1529 result += cur_size;
1530 buf += cur_size;
1531 *pos += cur_size;
1532 size -= cur_size;
1533 }
1534
1535 return result;
1536}
1537
1538static const struct file_operations amdgpu_ttm_gtt_fops = {
1539 .owner = THIS_MODULE,
1540 .read = amdgpu_ttm_gtt_read,
1541 .llseek = default_llseek
1542};
1543
1544#endif
1545
a1d29476
CK
1546#endif
1547
d38ceaf9
AD
1548static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
1549{
1550#if defined(CONFIG_DEBUG_FS)
1551 unsigned count;
1552
1553 struct drm_minor *minor = adev->ddev->primary;
1554 struct dentry *ent, *root = minor->debugfs_root;
1555
1556 ent = debugfs_create_file("amdgpu_vram", S_IFREG | S_IRUGO, root,
1557 adev, &amdgpu_ttm_vram_fops);
1558 if (IS_ERR(ent))
1559 return PTR_ERR(ent);
1560 i_size_write(ent->d_inode, adev->mc.mc_vram_size);
1561 adev->mman.vram = ent;
1562
a1d29476 1563#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
d38ceaf9
AD
1564 ent = debugfs_create_file("amdgpu_gtt", S_IFREG | S_IRUGO, root,
1565 adev, &amdgpu_ttm_gtt_fops);
1566 if (IS_ERR(ent))
1567 return PTR_ERR(ent);
1568 i_size_write(ent->d_inode, adev->mc.gtt_size);
1569 adev->mman.gtt = ent;
1570
a1d29476 1571#endif
d38ceaf9
AD
1572 count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
1573
1574#ifdef CONFIG_SWIOTLB
1575 if (!swiotlb_nr_tbl())
1576 --count;
1577#endif
1578
1579 return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
1580#else
1581
1582 return 0;
1583#endif
1584}
1585
1586static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
1587{
1588#if defined(CONFIG_DEBUG_FS)
1589
1590 debugfs_remove(adev->mman.vram);
1591 adev->mman.vram = NULL;
1592
a1d29476 1593#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
d38ceaf9
AD
1594 debugfs_remove(adev->mman.gtt);
1595 adev->mman.gtt = NULL;
1596#endif
a1d29476
CK
1597
1598#endif
d38ceaf9 1599}