Commit | Line | Data |
---|---|---|
184a69ca CK |
1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* | |
3 | * Copyright 2022 Advanced Micro Devices, Inc. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
21 | * OTHER DEALINGS IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | #include <drm/drm_drv.h> | |
25 | ||
26 | #include "amdgpu.h" | |
27 | #include "amdgpu_trace.h" | |
28 | #include "amdgpu_vm.h" | |
29 | ||
30 | /* | |
31 | * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt | |
32 | */ | |
33 | struct amdgpu_vm_pt_cursor { | |
34 | uint64_t pfn; | |
35 | struct amdgpu_vm_bo_base *parent; | |
36 | struct amdgpu_vm_bo_base *entry; | |
37 | unsigned int level; | |
38 | }; | |
39 | ||
40 | /** | |
41 | * amdgpu_vm_pt_level_shift - return the addr shift for each level | |
42 | * | |
43 | * @adev: amdgpu_device pointer | |
44 | * @level: VMPT level | |
45 | * | |
46 | * Returns: | |
47 | * The number of bits the pfn needs to be right shifted for a level. | |
48 | */ | |
49 | static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, | |
50 | unsigned int level) | |
51 | { | |
52 | switch (level) { | |
53 | case AMDGPU_VM_PDB2: | |
54 | case AMDGPU_VM_PDB1: | |
55 | case AMDGPU_VM_PDB0: | |
56 | return 9 * (AMDGPU_VM_PDB0 - level) + | |
57 | adev->vm_manager.block_size; | |
58 | case AMDGPU_VM_PTB: | |
59 | return 0; | |
60 | default: | |
61 | return ~0; | |
62 | } | |
63 | } | |
64 | ||
65 | /** | |
66 | * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT | |
67 | * | |
68 | * @adev: amdgpu_device pointer | |
69 | * @level: VMPT level | |
70 | * | |
71 | * Returns: | |
72 | * The number of entries in a page directory or page table. | |
73 | */ | |
74 | static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, | |
75 | unsigned int level) | |
76 | { | |
77 | unsigned int shift; | |
78 | ||
79 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | |
80 | if (level == adev->vm_manager.root_level) | |
81 | /* For the root directory */ | |
82 | return round_up(adev->vm_manager.max_pfn, 1ULL << shift) | |
83 | >> shift; | |
84 | else if (level != AMDGPU_VM_PTB) | |
85 | /* Everything in between */ | |
86 | return 512; | |
87 | ||
88 | /* For the page tables on the leaves */ | |
89 | return AMDGPU_VM_PTE_COUNT(adev); | |
90 | } | |
91 | ||
92 | /** | |
93 | * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD | |
94 | * | |
95 | * @adev: amdgpu_device pointer | |
96 | * | |
97 | * Returns: | |
98 | * The number of entries in the root page directory which needs the ATS setting. | |
99 | */ | |
100 | static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) | |
101 | { | |
102 | unsigned int shift; | |
103 | ||
104 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | |
105 | return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); | |
106 | } | |
107 | ||
108 | /** | |
109 | * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT | |
110 | * | |
111 | * @adev: amdgpu_device pointer | |
112 | * @level: VMPT level | |
113 | * | |
114 | * Returns: | |
115 | * The mask to extract the entry number of a PD/PT from an address. | |
116 | */ | |
117 | static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, | |
118 | unsigned int level) | |
119 | { | |
120 | if (level <= adev->vm_manager.root_level) | |
121 | return 0xffffffff; | |
122 | else if (level != AMDGPU_VM_PTB) | |
123 | return 0x1ff; | |
124 | else | |
125 | return AMDGPU_VM_PTE_COUNT(adev) - 1; | |
126 | } | |
127 | ||
128 | /** | |
129 | * amdgpu_vm_pt_size - returns the size of the page table in bytes | |
130 | * | |
131 | * @adev: amdgpu_device pointer | |
132 | * @level: VMPT level | |
133 | * | |
134 | * Returns: | |
135 | * The size of the BO for a page directory or page table in bytes. | |
136 | */ | |
137 | static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, | |
138 | unsigned int level) | |
139 | { | |
140 | return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); | |
141 | } | |
142 | ||
143 | /** | |
144 | * amdgpu_vm_pt_parent - get the parent page directory | |
145 | * | |
146 | * @pt: child page table | |
147 | * | |
148 | * Helper to get the parent entry for the child page table. NULL if we are at | |
149 | * the root page directory. | |
150 | */ | |
151 | static struct amdgpu_vm_bo_base * | |
152 | amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) | |
153 | { | |
154 | struct amdgpu_bo *parent = pt->bo->parent; | |
155 | ||
156 | if (!parent) | |
157 | return NULL; | |
158 | ||
159 | return parent->vm_bo; | |
160 | } | |
161 | ||
162 | /** | |
163 | * amdgpu_vm_pt_start - start PD/PT walk | |
164 | * | |
165 | * @adev: amdgpu_device pointer | |
166 | * @vm: amdgpu_vm structure | |
167 | * @start: start address of the walk | |
168 | * @cursor: state to initialize | |
169 | * | |
170 | * Initialize a amdgpu_vm_pt_cursor to start a walk. | |
171 | */ | |
172 | static void amdgpu_vm_pt_start(struct amdgpu_device *adev, | |
173 | struct amdgpu_vm *vm, uint64_t start, | |
174 | struct amdgpu_vm_pt_cursor *cursor) | |
175 | { | |
176 | cursor->pfn = start; | |
177 | cursor->parent = NULL; | |
178 | cursor->entry = &vm->root; | |
179 | cursor->level = adev->vm_manager.root_level; | |
180 | } | |
181 | ||
182 | /** | |
183 | * amdgpu_vm_pt_descendant - go to child node | |
184 | * | |
185 | * @adev: amdgpu_device pointer | |
186 | * @cursor: current state | |
187 | * | |
188 | * Walk to the child node of the current node. | |
189 | * Returns: | |
190 | * True if the walk was possible, false otherwise. | |
191 | */ | |
192 | static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, | |
193 | struct amdgpu_vm_pt_cursor *cursor) | |
194 | { | |
195 | unsigned int mask, shift, idx; | |
196 | ||
197 | if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || | |
198 | !cursor->entry->bo) | |
199 | return false; | |
200 | ||
201 | mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); | |
202 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level); | |
203 | ||
204 | ++cursor->level; | |
205 | idx = (cursor->pfn >> shift) & mask; | |
206 | cursor->parent = cursor->entry; | |
207 | cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; | |
208 | return true; | |
209 | } | |
210 | ||
211 | /** | |
212 | * amdgpu_vm_pt_sibling - go to sibling node | |
213 | * | |
214 | * @adev: amdgpu_device pointer | |
215 | * @cursor: current state | |
216 | * | |
217 | * Walk to the sibling node of the current node. | |
218 | * Returns: | |
219 | * True if the walk was possible, false otherwise. | |
220 | */ | |
221 | static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, | |
222 | struct amdgpu_vm_pt_cursor *cursor) | |
223 | { | |
224 | ||
225 | unsigned int shift, num_entries; | |
226 | struct amdgpu_bo_vm *parent; | |
227 | ||
228 | /* Root doesn't have a sibling */ | |
229 | if (!cursor->parent) | |
230 | return false; | |
231 | ||
232 | /* Go to our parents and see if we got a sibling */ | |
233 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); | |
234 | num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); | |
235 | parent = to_amdgpu_bo_vm(cursor->parent->bo); | |
236 | ||
237 | if (cursor->entry == &parent->entries[num_entries - 1]) | |
238 | return false; | |
239 | ||
240 | cursor->pfn += 1ULL << shift; | |
241 | cursor->pfn &= ~((1ULL << shift) - 1); | |
242 | ++cursor->entry; | |
243 | return true; | |
244 | } | |
245 | ||
246 | /** | |
247 | * amdgpu_vm_pt_ancestor - go to parent node | |
248 | * | |
249 | * @cursor: current state | |
250 | * | |
251 | * Walk to the parent node of the current node. | |
252 | * Returns: | |
253 | * True if the walk was possible, false otherwise. | |
254 | */ | |
255 | static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) | |
256 | { | |
257 | if (!cursor->parent) | |
258 | return false; | |
259 | ||
260 | --cursor->level; | |
261 | cursor->entry = cursor->parent; | |
262 | cursor->parent = amdgpu_vm_pt_parent(cursor->parent); | |
263 | return true; | |
264 | } | |
265 | ||
266 | /** | |
267 | * amdgpu_vm_pt_next - get next PD/PT in hieratchy | |
268 | * | |
269 | * @adev: amdgpu_device pointer | |
270 | * @cursor: current state | |
271 | * | |
272 | * Walk the PD/PT tree to the next node. | |
273 | */ | |
274 | static void amdgpu_vm_pt_next(struct amdgpu_device *adev, | |
275 | struct amdgpu_vm_pt_cursor *cursor) | |
276 | { | |
277 | /* First try a newborn child */ | |
278 | if (amdgpu_vm_pt_descendant(adev, cursor)) | |
279 | return; | |
280 | ||
281 | /* If that didn't worked try to find a sibling */ | |
282 | while (!amdgpu_vm_pt_sibling(adev, cursor)) { | |
283 | /* No sibling, go to our parents and grandparents */ | |
284 | if (!amdgpu_vm_pt_ancestor(cursor)) { | |
285 | cursor->pfn = ~0ll; | |
286 | return; | |
287 | } | |
288 | } | |
289 | } | |
290 | ||
291 | /** | |
292 | * amdgpu_vm_pt_first_dfs - start a deep first search | |
293 | * | |
294 | * @adev: amdgpu_device structure | |
295 | * @vm: amdgpu_vm structure | |
296 | * @start: optional cursor to start with | |
297 | * @cursor: state to initialize | |
298 | * | |
299 | * Starts a deep first traversal of the PD/PT tree. | |
300 | */ | |
301 | static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, | |
302 | struct amdgpu_vm *vm, | |
303 | struct amdgpu_vm_pt_cursor *start, | |
304 | struct amdgpu_vm_pt_cursor *cursor) | |
305 | { | |
306 | if (start) | |
307 | *cursor = *start; | |
308 | else | |
309 | amdgpu_vm_pt_start(adev, vm, 0, cursor); | |
310 | ||
311 | while (amdgpu_vm_pt_descendant(adev, cursor)) | |
312 | ; | |
313 | } | |
314 | ||
315 | /** | |
316 | * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue | |
317 | * | |
318 | * @start: starting point for the search | |
319 | * @entry: current entry | |
320 | * | |
321 | * Returns: | |
322 | * True when the search should continue, false otherwise. | |
323 | */ | |
324 | static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, | |
325 | struct amdgpu_vm_bo_base *entry) | |
326 | { | |
327 | return entry && (!start || entry != start->entry); | |
328 | } | |
329 | ||
330 | /** | |
331 | * amdgpu_vm_pt_next_dfs - get the next node for a deep first search | |
332 | * | |
333 | * @adev: amdgpu_device structure | |
334 | * @cursor: current state | |
335 | * | |
336 | * Move the cursor to the next node in a deep first search. | |
337 | */ | |
338 | static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, | |
339 | struct amdgpu_vm_pt_cursor *cursor) | |
340 | { | |
341 | if (!cursor->entry) | |
342 | return; | |
343 | ||
344 | if (!cursor->parent) | |
345 | cursor->entry = NULL; | |
346 | else if (amdgpu_vm_pt_sibling(adev, cursor)) | |
347 | while (amdgpu_vm_pt_descendant(adev, cursor)) | |
348 | ; | |
349 | else | |
350 | amdgpu_vm_pt_ancestor(cursor); | |
351 | } | |
352 | ||
353 | /* | |
354 | * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs | |
355 | */ | |
356 | #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ | |
357 | for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ | |
358 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ | |
359 | amdgpu_vm_pt_continue_dfs((start), (entry)); \ | |
360 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) | |
361 | ||
362 | /** | |
363 | * amdgpu_vm_pt_clear - initially clear the PDs/PTs | |
364 | * | |
365 | * @adev: amdgpu_device pointer | |
366 | * @vm: VM to clear BO from | |
367 | * @vmbo: BO to clear | |
368 | * @immediate: use an immediate update | |
369 | * | |
370 | * Root PD needs to be reserved when calling this. | |
371 | * | |
372 | * Returns: | |
373 | * 0 on success, errno otherwise. | |
374 | */ | |
375 | int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |
376 | struct amdgpu_bo_vm *vmbo, bool immediate) | |
377 | { | |
378 | unsigned int level = adev->vm_manager.root_level; | |
379 | struct ttm_operation_ctx ctx = { true, false }; | |
380 | struct amdgpu_vm_update_params params; | |
381 | struct amdgpu_bo *ancestor = &vmbo->bo; | |
382 | unsigned int entries, ats_entries; | |
383 | struct amdgpu_bo *bo = &vmbo->bo; | |
384 | uint64_t addr; | |
385 | int r, idx; | |
386 | ||
387 | /* Figure out our place in the hierarchy */ | |
388 | if (ancestor->parent) { | |
389 | ++level; | |
390 | while (ancestor->parent->parent) { | |
391 | ++level; | |
392 | ancestor = ancestor->parent; | |
393 | } | |
394 | } | |
395 | ||
396 | entries = amdgpu_bo_size(bo) / 8; | |
397 | if (!vm->pte_support_ats) { | |
398 | ats_entries = 0; | |
399 | ||
400 | } else if (!bo->parent) { | |
401 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | |
402 | ats_entries = min(ats_entries, entries); | |
403 | entries -= ats_entries; | |
404 | ||
405 | } else { | |
406 | struct amdgpu_vm_bo_base *pt; | |
407 | ||
408 | pt = ancestor->vm_bo; | |
409 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | |
410 | if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= | |
411 | ats_entries) { | |
412 | ats_entries = 0; | |
413 | } else { | |
414 | ats_entries = entries; | |
415 | entries = 0; | |
416 | } | |
417 | } | |
418 | ||
419 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | |
420 | if (r) | |
421 | return r; | |
422 | ||
423 | if (vmbo->shadow) { | |
424 | struct amdgpu_bo *shadow = vmbo->shadow; | |
425 | ||
426 | r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); | |
427 | if (r) | |
428 | return r; | |
429 | } | |
430 | ||
431 | if (!drm_dev_enter(adev_to_drm(adev), &idx)) | |
432 | return -ENODEV; | |
433 | ||
434 | r = vm->update_funcs->map_table(vmbo); | |
435 | if (r) | |
436 | goto exit; | |
437 | ||
438 | memset(¶ms, 0, sizeof(params)); | |
439 | params.adev = adev; | |
440 | params.vm = vm; | |
441 | params.immediate = immediate; | |
442 | ||
443 | r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); | |
444 | if (r) | |
445 | goto exit; | |
446 | ||
447 | addr = 0; | |
448 | if (ats_entries) { | |
449 | uint64_t value = 0, flags; | |
450 | ||
451 | flags = AMDGPU_PTE_DEFAULT_ATC; | |
452 | if (level != AMDGPU_VM_PTB) { | |
453 | /* Handle leaf PDEs as PTEs */ | |
454 | flags |= AMDGPU_PDE_PTE; | |
455 | amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); | |
456 | } | |
457 | ||
458 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, | |
459 | ats_entries, value, flags); | |
460 | if (r) | |
461 | goto exit; | |
462 | ||
463 | addr += ats_entries * 8; | |
464 | } | |
465 | ||
466 | if (entries) { | |
467 | uint64_t value = 0, flags = 0; | |
468 | ||
469 | if (adev->asic_type >= CHIP_VEGA10) { | |
470 | if (level != AMDGPU_VM_PTB) { | |
471 | /* Handle leaf PDEs as PTEs */ | |
472 | flags |= AMDGPU_PDE_PTE; | |
473 | amdgpu_gmc_get_vm_pde(adev, level, | |
474 | &value, &flags); | |
475 | } else { | |
476 | /* Workaround for fault priority problem on GMC9 */ | |
477 | flags = AMDGPU_PTE_EXECUTABLE; | |
478 | } | |
479 | } | |
480 | ||
481 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, | |
482 | value, flags); | |
483 | if (r) | |
484 | goto exit; | |
485 | } | |
486 | ||
487 | r = vm->update_funcs->commit(¶ms, NULL); | |
488 | exit: | |
489 | drm_dev_exit(idx); | |
490 | return r; | |
491 | } | |
492 | ||
493 | /** | |
494 | * amdgpu_vm_pt_create - create bo for PD/PT | |
495 | * | |
496 | * @adev: amdgpu_device pointer | |
497 | * @vm: requesting vm | |
498 | * @level: the page table level | |
499 | * @immediate: use a immediate update | |
500 | * @vmbo: pointer to the buffer object pointer | |
501 | */ | |
502 | int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |
503 | int level, bool immediate, struct amdgpu_bo_vm **vmbo) | |
504 | { | |
505 | struct amdgpu_bo_param bp; | |
506 | struct amdgpu_bo *bo; | |
507 | struct dma_resv *resv; | |
508 | unsigned int num_entries; | |
509 | int r; | |
510 | ||
511 | memset(&bp, 0, sizeof(bp)); | |
512 | ||
513 | bp.size = amdgpu_vm_pt_size(adev, level); | |
514 | bp.byte_align = AMDGPU_GPU_PAGE_SIZE; | |
515 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; | |
516 | bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); | |
517 | bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | |
518 | AMDGPU_GEM_CREATE_CPU_GTT_USWC; | |
519 | ||
520 | if (level < AMDGPU_VM_PTB) | |
521 | num_entries = amdgpu_vm_pt_num_entries(adev, level); | |
522 | else | |
523 | num_entries = 0; | |
524 | ||
525 | bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); | |
526 | ||
527 | if (vm->use_cpu_for_update) | |
528 | bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | |
529 | ||
530 | bp.type = ttm_bo_type_kernel; | |
531 | bp.no_wait_gpu = immediate; | |
532 | if (vm->root.bo) | |
533 | bp.resv = vm->root.bo->tbo.base.resv; | |
534 | ||
535 | r = amdgpu_bo_create_vm(adev, &bp, vmbo); | |
536 | if (r) | |
537 | return r; | |
538 | ||
539 | bo = &(*vmbo)->bo; | |
540 | if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { | |
541 | (*vmbo)->shadow = NULL; | |
542 | return 0; | |
543 | } | |
544 | ||
545 | if (!bp.resv) | |
546 | WARN_ON(dma_resv_lock(bo->tbo.base.resv, | |
547 | NULL)); | |
548 | resv = bp.resv; | |
549 | memset(&bp, 0, sizeof(bp)); | |
550 | bp.size = amdgpu_vm_pt_size(adev, level); | |
551 | bp.domain = AMDGPU_GEM_DOMAIN_GTT; | |
552 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; | |
553 | bp.type = ttm_bo_type_kernel; | |
554 | bp.resv = bo->tbo.base.resv; | |
555 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); | |
556 | ||
557 | r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); | |
558 | ||
559 | if (!resv) | |
560 | dma_resv_unlock(bo->tbo.base.resv); | |
561 | ||
562 | if (r) { | |
563 | amdgpu_bo_unref(&bo); | |
564 | return r; | |
565 | } | |
566 | ||
567 | (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); | |
568 | amdgpu_bo_add_to_shadow_list(*vmbo); | |
569 | ||
570 | return 0; | |
571 | } | |
572 | ||
573 | /** | |
574 | * amdgpu_vm_pt_alloc - Allocate a specific page table | |
575 | * | |
576 | * @adev: amdgpu_device pointer | |
577 | * @vm: VM to allocate page tables for | |
578 | * @cursor: Which page table to allocate | |
579 | * @immediate: use an immediate update | |
580 | * | |
581 | * Make sure a specific page table or directory is allocated. | |
582 | * | |
583 | * Returns: | |
584 | * 1 if page table needed to be allocated, 0 if page table was already | |
585 | * allocated, negative errno if an error occurred. | |
586 | */ | |
587 | static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, | |
588 | struct amdgpu_vm *vm, | |
589 | struct amdgpu_vm_pt_cursor *cursor, | |
590 | bool immediate) | |
591 | { | |
592 | struct amdgpu_vm_bo_base *entry = cursor->entry; | |
593 | struct amdgpu_bo *pt_bo; | |
594 | struct amdgpu_bo_vm *pt; | |
595 | int r; | |
596 | ||
597 | if (entry->bo) | |
598 | return 0; | |
599 | ||
600 | r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); | |
601 | if (r) | |
602 | return r; | |
603 | ||
604 | /* Keep a reference to the root directory to avoid | |
605 | * freeing them up in the wrong order. | |
606 | */ | |
607 | pt_bo = &pt->bo; | |
608 | pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); | |
609 | amdgpu_vm_bo_base_init(entry, vm, pt_bo); | |
610 | r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); | |
611 | if (r) | |
612 | goto error_free_pt; | |
613 | ||
614 | return 0; | |
615 | ||
616 | error_free_pt: | |
617 | amdgpu_bo_unref(&pt->shadow); | |
618 | amdgpu_bo_unref(&pt_bo); | |
619 | return r; | |
620 | } | |
621 | ||
622 | /** | |
55a2d21b | 623 | * amdgpu_vm_pt_free - free one PD/PT |
184a69ca CK |
624 | * |
625 | * @entry: PDE to free | |
626 | */ | |
627 | static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) | |
628 | { | |
629 | struct amdgpu_bo *shadow; | |
630 | ||
631 | if (!entry->bo) | |
632 | return; | |
633 | shadow = amdgpu_bo_shadowed(entry->bo); | |
8d62a974 CK |
634 | if (shadow) { |
635 | ttm_bo_set_bulk_move(&shadow->tbo, NULL); | |
636 | amdgpu_bo_unref(&shadow); | |
637 | } | |
638 | ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); | |
184a69ca | 639 | entry->bo->vm_bo = NULL; |
c2dbd69e PY |
640 | |
641 | spin_lock(&entry->vm->status_lock); | |
184a69ca | 642 | list_del(&entry->vm_status); |
c2dbd69e | 643 | spin_unlock(&entry->vm->status_lock); |
184a69ca CK |
644 | amdgpu_bo_unref(&entry->bo); |
645 | } | |
646 | ||
3e43b760 PY |
647 | void amdgpu_vm_pt_free_work(struct work_struct *work) |
648 | { | |
649 | struct amdgpu_vm_bo_base *entry, *next; | |
650 | struct amdgpu_vm *vm; | |
651 | LIST_HEAD(pt_freed); | |
652 | ||
653 | vm = container_of(work, struct amdgpu_vm, pt_free_work); | |
654 | ||
655 | spin_lock(&vm->status_lock); | |
656 | list_splice_init(&vm->pt_freed, &pt_freed); | |
657 | spin_unlock(&vm->status_lock); | |
658 | ||
659 | /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ | |
660 | amdgpu_bo_reserve(vm->root.bo, true); | |
661 | ||
662 | list_for_each_entry_safe(entry, next, &pt_freed, vm_status) | |
663 | amdgpu_vm_pt_free(entry); | |
664 | ||
665 | amdgpu_bo_unreserve(vm->root.bo); | |
666 | } | |
667 | ||
184a69ca CK |
668 | /** |
669 | * amdgpu_vm_pt_free_dfs - free PD/PT levels | |
670 | * | |
671 | * @adev: amdgpu device structure | |
672 | * @vm: amdgpu vm structure | |
673 | * @start: optional cursor where to start freeing PDs/PTs | |
674 | * | |
675 | * Free the page directory or page table level and all sub levels. | |
676 | */ | |
677 | static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, | |
678 | struct amdgpu_vm *vm, | |
3e43b760 PY |
679 | struct amdgpu_vm_pt_cursor *start, |
680 | bool unlocked) | |
184a69ca CK |
681 | { |
682 | struct amdgpu_vm_pt_cursor cursor; | |
683 | struct amdgpu_vm_bo_base *entry; | |
684 | ||
3e43b760 PY |
685 | if (unlocked) { |
686 | spin_lock(&vm->status_lock); | |
687 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) | |
688 | list_move(&entry->vm_status, &vm->pt_freed); | |
689 | ||
690 | if (start) | |
691 | list_move(&start->entry->vm_status, &vm->pt_freed); | |
692 | spin_unlock(&vm->status_lock); | |
693 | schedule_work(&vm->pt_free_work); | |
694 | return; | |
695 | } | |
696 | ||
184a69ca CK |
697 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) |
698 | amdgpu_vm_pt_free(entry); | |
699 | ||
700 | if (start) | |
701 | amdgpu_vm_pt_free(start->entry); | |
702 | } | |
703 | ||
704 | /** | |
705 | * amdgpu_vm_pt_free_root - free root PD | |
706 | * @adev: amdgpu device structure | |
707 | * @vm: amdgpu vm structure | |
708 | * | |
709 | * Free the root page directory and everything below it. | |
710 | */ | |
711 | void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |
712 | { | |
3e43b760 | 713 | amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); |
184a69ca CK |
714 | } |
715 | ||
716 | /** | |
717 | * amdgpu_vm_pt_is_root_clean - check if a root PD is clean | |
718 | * | |
719 | * @adev: amdgpu_device pointer | |
720 | * @vm: the VM to check | |
721 | * | |
722 | * Check all entries of the root PD, if any subsequent PDs are allocated, | |
723 | * it means there are page table creating and filling, and is no a clean | |
724 | * VM | |
725 | * | |
726 | * Returns: | |
727 | * 0 if this VM is clean | |
728 | */ | |
729 | bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, | |
730 | struct amdgpu_vm *vm) | |
731 | { | |
732 | enum amdgpu_vm_level root = adev->vm_manager.root_level; | |
733 | unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); | |
734 | unsigned int i = 0; | |
735 | ||
736 | for (i = 0; i < entries; i++) { | |
737 | if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) | |
738 | return false; | |
739 | } | |
740 | return true; | |
741 | } | |
742 | ||
743 | /** | |
744 | * amdgpu_vm_pde_update - update a single level in the hierarchy | |
745 | * | |
746 | * @params: parameters for the update | |
747 | * @entry: entry to update | |
748 | * | |
749 | * Makes sure the requested entry in parent is up to date. | |
750 | */ | |
751 | int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, | |
752 | struct amdgpu_vm_bo_base *entry) | |
753 | { | |
754 | struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); | |
755 | struct amdgpu_bo *bo = parent->bo, *pbo; | |
756 | struct amdgpu_vm *vm = params->vm; | |
757 | uint64_t pde, pt, flags; | |
758 | unsigned int level; | |
759 | ||
760 | for (level = 0, pbo = bo->parent; pbo; ++level) | |
761 | pbo = pbo->parent; | |
762 | ||
763 | level += params->adev->vm_manager.root_level; | |
764 | amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); | |
765 | pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; | |
766 | return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, | |
767 | 1, 0, flags); | |
768 | } | |
769 | ||
770 | /* | |
771 | * amdgpu_vm_pte_update_flags - figure out flags for PTE updates | |
772 | * | |
773 | * Make sure to set the right flags for the PTEs at the desired level. | |
774 | */ | |
775 | static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, | |
776 | struct amdgpu_bo_vm *pt, | |
777 | unsigned int level, | |
778 | uint64_t pe, uint64_t addr, | |
779 | unsigned int count, uint32_t incr, | |
780 | uint64_t flags) | |
781 | ||
782 | { | |
783 | if (level != AMDGPU_VM_PTB) { | |
784 | flags |= AMDGPU_PDE_PTE; | |
785 | amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); | |
786 | ||
787 | } else if (params->adev->asic_type >= CHIP_VEGA10 && | |
788 | !(flags & AMDGPU_PTE_VALID) && | |
789 | !(flags & AMDGPU_PTE_PRT)) { | |
790 | ||
791 | /* Workaround for fault priority problem on GMC9 */ | |
792 | flags |= AMDGPU_PTE_EXECUTABLE; | |
793 | } | |
794 | ||
795 | params->vm->update_funcs->update(params, pt, pe, addr, count, incr, | |
796 | flags); | |
797 | } | |
798 | ||
799 | /** | |
800 | * amdgpu_vm_pte_fragment - get fragment for PTEs | |
801 | * | |
802 | * @params: see amdgpu_vm_update_params definition | |
803 | * @start: first PTE to handle | |
804 | * @end: last PTE to handle | |
805 | * @flags: hw mapping flags | |
806 | * @frag: resulting fragment size | |
807 | * @frag_end: end of this fragment | |
808 | * | |
809 | * Returns the first possible fragment for the start and end address. | |
810 | */ | |
811 | static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, | |
812 | uint64_t start, uint64_t end, uint64_t flags, | |
813 | unsigned int *frag, uint64_t *frag_end) | |
814 | { | |
815 | /** | |
816 | * The MC L1 TLB supports variable sized pages, based on a fragment | |
817 | * field in the PTE. When this field is set to a non-zero value, page | |
818 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE | |
819 | * flags are considered valid for all PTEs within the fragment range | |
820 | * and corresponding mappings are assumed to be physically contiguous. | |
821 | * | |
822 | * The L1 TLB can store a single PTE for the whole fragment, | |
823 | * significantly increasing the space available for translation | |
824 | * caching. This leads to large improvements in throughput when the | |
825 | * TLB is under pressure. | |
826 | * | |
827 | * The L2 TLB distributes small and large fragments into two | |
828 | * asymmetric partitions. The large fragment cache is significantly | |
829 | * larger. Thus, we try to use large fragments wherever possible. | |
830 | * Userspace can support this by aligning virtual base address and | |
831 | * allocation size to the fragment size. | |
832 | * | |
833 | * Starting with Vega10 the fragment size only controls the L1. The L2 | |
834 | * is now directly feed with small/huge/giant pages from the walker. | |
835 | */ | |
836 | unsigned int max_frag; | |
837 | ||
838 | if (params->adev->asic_type < CHIP_VEGA10) | |
839 | max_frag = params->adev->vm_manager.fragment_size; | |
840 | else | |
841 | max_frag = 31; | |
842 | ||
843 | /* system pages are non continuously */ | |
844 | if (params->pages_addr) { | |
845 | *frag = 0; | |
846 | *frag_end = end; | |
847 | return; | |
848 | } | |
849 | ||
850 | /* This intentionally wraps around if no bit is set */ | |
851 | *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); | |
852 | if (*frag >= max_frag) { | |
853 | *frag = max_frag; | |
854 | *frag_end = end & ~((1ULL << max_frag) - 1); | |
855 | } else { | |
856 | *frag_end = start + (1 << *frag); | |
857 | } | |
858 | } | |
859 | ||
860 | /** | |
861 | * amdgpu_vm_ptes_update - make sure that page tables are valid | |
862 | * | |
863 | * @params: see amdgpu_vm_update_params definition | |
864 | * @start: start of GPU address range | |
865 | * @end: end of GPU address range | |
866 | * @dst: destination address to map to, the next dst inside the function | |
867 | * @flags: mapping flags | |
868 | * | |
869 | * Update the page tables in the range @start - @end. | |
870 | * | |
871 | * Returns: | |
872 | * 0 for success, -EINVAL for failure. | |
873 | */ | |
874 | int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, | |
875 | uint64_t start, uint64_t end, | |
876 | uint64_t dst, uint64_t flags) | |
877 | { | |
878 | struct amdgpu_device *adev = params->adev; | |
879 | struct amdgpu_vm_pt_cursor cursor; | |
880 | uint64_t frag_start = start, frag_end; | |
881 | unsigned int frag; | |
882 | int r; | |
883 | ||
884 | /* figure out the initial fragment */ | |
885 | amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, | |
886 | &frag_end); | |
887 | ||
888 | /* walk over the address space and update the PTs */ | |
889 | amdgpu_vm_pt_start(adev, params->vm, start, &cursor); | |
890 | while (cursor.pfn < end) { | |
891 | unsigned int shift, parent_shift, mask; | |
892 | uint64_t incr, entry_end, pe_start; | |
893 | struct amdgpu_bo *pt; | |
894 | ||
895 | if (!params->unlocked) { | |
896 | /* make sure that the page tables covering the | |
897 | * address range are actually allocated | |
898 | */ | |
899 | r = amdgpu_vm_pt_alloc(params->adev, params->vm, | |
900 | &cursor, params->immediate); | |
901 | if (r) | |
902 | return r; | |
903 | } | |
904 | ||
905 | shift = amdgpu_vm_pt_level_shift(adev, cursor.level); | |
906 | parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); | |
907 | if (params->unlocked) { | |
908 | /* Unlocked updates are only allowed on the leaves */ | |
909 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |
910 | continue; | |
911 | } else if (adev->asic_type < CHIP_VEGA10 && | |
912 | (flags & AMDGPU_PTE_VALID)) { | |
913 | /* No huge page support before GMC v9 */ | |
914 | if (cursor.level != AMDGPU_VM_PTB) { | |
915 | if (!amdgpu_vm_pt_descendant(adev, &cursor)) | |
916 | return -ENOENT; | |
917 | continue; | |
918 | } | |
919 | } else if (frag < shift) { | |
920 | /* We can't use this level when the fragment size is | |
921 | * smaller than the address shift. Go to the next | |
922 | * child entry and try again. | |
923 | */ | |
924 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |
925 | continue; | |
926 | } else if (frag >= parent_shift) { | |
927 | /* If the fragment size is even larger than the parent | |
928 | * shift we should go up one level and check it again. | |
929 | */ | |
930 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |
931 | return -EINVAL; | |
932 | continue; | |
933 | } | |
934 | ||
935 | pt = cursor.entry->bo; | |
936 | if (!pt) { | |
937 | /* We need all PDs and PTs for mapping something, */ | |
938 | if (flags & AMDGPU_PTE_VALID) | |
939 | return -ENOENT; | |
940 | ||
941 | /* but unmapping something can happen at a higher | |
942 | * level. | |
943 | */ | |
944 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |
945 | return -EINVAL; | |
946 | ||
947 | pt = cursor.entry->bo; | |
948 | shift = parent_shift; | |
949 | frag_end = max(frag_end, ALIGN(frag_start + 1, | |
950 | 1ULL << shift)); | |
951 | } | |
952 | ||
953 | /* Looks good so far, calculate parameters for the update */ | |
954 | incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; | |
955 | mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); | |
956 | pe_start = ((cursor.pfn >> shift) & mask) * 8; | |
957 | entry_end = ((uint64_t)mask + 1) << shift; | |
958 | entry_end += cursor.pfn & ~(entry_end - 1); | |
959 | entry_end = min(entry_end, end); | |
960 | ||
961 | do { | |
962 | struct amdgpu_vm *vm = params->vm; | |
963 | uint64_t upd_end = min(entry_end, frag_end); | |
964 | unsigned int nptes = (upd_end - frag_start) >> shift; | |
965 | uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); | |
966 | ||
967 | /* This can happen when we set higher level PDs to | |
968 | * silent to stop fault floods. | |
969 | */ | |
970 | nptes = max(nptes, 1u); | |
971 | ||
972 | trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, | |
973 | min(nptes, 32u), dst, incr, | |
974 | upd_flags, | |
975 | vm->task_info.pid, | |
976 | vm->immediate.fence_context); | |
977 | amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), | |
978 | cursor.level, pe_start, dst, | |
979 | nptes, incr, upd_flags); | |
980 | ||
981 | pe_start += nptes * 8; | |
982 | dst += nptes * incr; | |
983 | ||
984 | frag_start = upd_end; | |
985 | if (frag_start >= frag_end) { | |
986 | /* figure out the next fragment */ | |
987 | amdgpu_vm_pte_fragment(params, frag_start, end, | |
988 | flags, &frag, &frag_end); | |
989 | if (frag < shift) | |
990 | break; | |
991 | } | |
992 | } while (frag_start < entry_end); | |
993 | ||
994 | if (amdgpu_vm_pt_descendant(adev, &cursor)) { | |
995 | /* Free all child entries. | |
996 | * Update the tables with the flags and addresses and free up subsequent | |
997 | * tables in the case of huge pages or freed up areas. | |
998 | * This is the maximum you can free, because all other page tables are not | |
999 | * completely covered by the range and so potentially still in use. | |
1000 | */ | |
1001 | while (cursor.pfn < frag_start) { | |
1002 | /* Make sure previous mapping is freed */ | |
1003 | if (cursor.entry->bo) { | |
1004 | params->table_freed = true; | |
1005 | amdgpu_vm_pt_free_dfs(adev, params->vm, | |
3e43b760 PY |
1006 | &cursor, |
1007 | params->unlocked); | |
184a69ca CK |
1008 | } |
1009 | amdgpu_vm_pt_next(adev, &cursor); | |
1010 | } | |
1011 | ||
1012 | } else if (frag >= shift) { | |
1013 | /* or just move on to the next on the same level. */ | |
1014 | amdgpu_vm_pt_next(adev, &cursor); | |
1015 | } | |
1016 | } | |
1017 | ||
1018 | return 0; | |
1019 | } |