Commit | Line | Data |
---|---|---|
19f6d2a6 OG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #include <linux/mutex.h> | |
24 | #include <linux/log2.h> | |
25 | #include <linux/sched.h> | |
6e84f315 | 26 | #include <linux/sched/mm.h> |
c7b1243e | 27 | #include <linux/sched/task.h> |
19f6d2a6 | 28 | #include <linux/slab.h> |
b17f068a | 29 | #include <linux/amd-iommu.h> |
19f6d2a6 | 30 | #include <linux/notifier.h> |
dd59239a | 31 | #include <linux/compat.h> |
373d7080 | 32 | #include <linux/mman.h> |
b84394e2 | 33 | #include <linux/file.h> |
5b87245f | 34 | #include "amdgpu_amdkfd.h" |
dd59239a | 35 | |
19f6d2a6 OG |
36 | struct mm_struct; |
37 | ||
38 | #include "kfd_priv.h" | |
403575c4 | 39 | #include "kfd_device_queue_manager.h" |
c3447e81 | 40 | #include "kfd_dbgmgr.h" |
64d1c3a4 | 41 | #include "kfd_iommu.h" |
19f6d2a6 | 42 | |
19f6d2a6 OG |
43 | /* |
44 | * List of struct kfd_process (field kfd_process). | |
45 | * Unique/indexed by mm_struct* | |
46 | */ | |
64d1c3a4 | 47 | DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); |
19f6d2a6 OG |
48 | static DEFINE_MUTEX(kfd_processes_mutex); |
49 | ||
64d1c3a4 | 50 | DEFINE_SRCU(kfd_processes_srcu); |
19f6d2a6 | 51 | |
1679ae8f | 52 | /* For process termination handling */ |
19f6d2a6 OG |
53 | static struct workqueue_struct *kfd_process_wq; |
54 | ||
1679ae8f FK |
55 | /* Ordered, single-threaded workqueue for restoring evicted |
56 | * processes. Restoring multiple processes concurrently under memory | |
57 | * pressure can lead to processes blocking each other from validating | |
58 | * their BOs and result in a live-lock situation where processes | |
59 | * remain evicted indefinitely. | |
60 | */ | |
61 | static struct workqueue_struct *kfd_restore_wq; | |
62 | ||
19f6d2a6 | 63 | static struct kfd_process *find_process(const struct task_struct *thread); |
abb208a8 | 64 | static void kfd_process_ref_release(struct kref *ref); |
c0ede1f8 YZ |
65 | static struct kfd_process *create_process(const struct task_struct *thread, |
66 | struct file *filep); | |
373d7080 | 67 | |
26103436 FK |
68 | static void evict_process_worker(struct work_struct *work); |
69 | static void restore_process_worker(struct work_struct *work); | |
70 | ||
de9f26bb KR |
71 | struct kfd_procfs_tree { |
72 | struct kobject *kobj; | |
73 | }; | |
74 | ||
75 | static struct kfd_procfs_tree procfs; | |
76 | ||
77 | static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, | |
78 | char *buffer) | |
79 | { | |
80 | int val = 0; | |
81 | ||
82 | if (strcmp(attr->name, "pasid") == 0) { | |
83 | struct kfd_process *p = container_of(attr, struct kfd_process, | |
84 | attr_pasid); | |
85 | val = p->pasid; | |
86 | } else { | |
87 | pr_err("Invalid attribute"); | |
88 | return -EINVAL; | |
89 | } | |
90 | ||
91 | return snprintf(buffer, PAGE_SIZE, "%d\n", val); | |
92 | } | |
93 | ||
94 | static void kfd_procfs_kobj_release(struct kobject *kobj) | |
95 | { | |
96 | kfree(kobj); | |
97 | } | |
98 | ||
99 | static const struct sysfs_ops kfd_procfs_ops = { | |
100 | .show = kfd_procfs_show, | |
101 | }; | |
102 | ||
103 | static struct kobj_type procfs_type = { | |
104 | .release = kfd_procfs_kobj_release, | |
105 | .sysfs_ops = &kfd_procfs_ops, | |
106 | }; | |
107 | ||
108 | void kfd_procfs_init(void) | |
109 | { | |
110 | int ret = 0; | |
111 | ||
112 | procfs.kobj = kfd_alloc_struct(procfs.kobj); | |
113 | if (!procfs.kobj) | |
114 | return; | |
115 | ||
116 | ret = kobject_init_and_add(procfs.kobj, &procfs_type, | |
117 | &kfd_device->kobj, "proc"); | |
118 | if (ret) { | |
119 | pr_warn("Could not create procfs proc folder"); | |
120 | /* If we fail to create the procfs, clean up */ | |
121 | kfd_procfs_shutdown(); | |
122 | } | |
123 | } | |
124 | ||
125 | void kfd_procfs_shutdown(void) | |
126 | { | |
127 | if (procfs.kobj) { | |
128 | kobject_del(procfs.kobj); | |
129 | kobject_put(procfs.kobj); | |
130 | procfs.kobj = NULL; | |
131 | } | |
132 | } | |
19f6d2a6 | 133 | |
1679ae8f | 134 | int kfd_process_create_wq(void) |
19f6d2a6 OG |
135 | { |
136 | if (!kfd_process_wq) | |
fd320bf6 | 137 | kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); |
1679ae8f FK |
138 | if (!kfd_restore_wq) |
139 | kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); | |
140 | ||
141 | if (!kfd_process_wq || !kfd_restore_wq) { | |
142 | kfd_process_destroy_wq(); | |
143 | return -ENOMEM; | |
144 | } | |
145 | ||
146 | return 0; | |
19f6d2a6 OG |
147 | } |
148 | ||
149 | void kfd_process_destroy_wq(void) | |
150 | { | |
151 | if (kfd_process_wq) { | |
19f6d2a6 OG |
152 | destroy_workqueue(kfd_process_wq); |
153 | kfd_process_wq = NULL; | |
154 | } | |
1679ae8f FK |
155 | if (kfd_restore_wq) { |
156 | destroy_workqueue(kfd_restore_wq); | |
157 | kfd_restore_wq = NULL; | |
158 | } | |
19f6d2a6 OG |
159 | } |
160 | ||
f35751b8 FK |
161 | static void kfd_process_free_gpuvm(struct kgd_mem *mem, |
162 | struct kfd_process_device *pdd) | |
163 | { | |
164 | struct kfd_dev *dev = pdd->dev; | |
165 | ||
5b87245f AL |
166 | amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); |
167 | amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem); | |
f35751b8 FK |
168 | } |
169 | ||
170 | /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process | |
171 | * This function should be only called right after the process | |
172 | * is created and when kfd_processes_mutex is still being held | |
173 | * to avoid concurrency. Because of that exclusiveness, we do | |
174 | * not need to take p->mutex. | |
175 | */ | |
176 | static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, | |
177 | uint64_t gpu_va, uint32_t size, | |
178 | uint32_t flags, void **kptr) | |
179 | { | |
180 | struct kfd_dev *kdev = pdd->dev; | |
181 | struct kgd_mem *mem = NULL; | |
182 | int handle; | |
183 | int err; | |
184 | ||
5b87245f | 185 | err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, |
f35751b8 FK |
186 | pdd->vm, &mem, NULL, flags); |
187 | if (err) | |
188 | goto err_alloc_mem; | |
189 | ||
5b87245f | 190 | err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm); |
f35751b8 FK |
191 | if (err) |
192 | goto err_map_mem; | |
193 | ||
5b87245f | 194 | err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true); |
f35751b8 FK |
195 | if (err) { |
196 | pr_debug("Sync memory failed, wait interrupted by user signal\n"); | |
197 | goto sync_memory_failed; | |
198 | } | |
199 | ||
200 | /* Create an obj handle so kfd_process_device_remove_obj_handle | |
201 | * will take care of the bo removal when the process finishes. | |
202 | * We do not need to take p->mutex, because the process is just | |
203 | * created and the ioctls have not had the chance to run. | |
204 | */ | |
205 | handle = kfd_process_device_create_obj_handle(pdd, mem); | |
206 | ||
207 | if (handle < 0) { | |
208 | err = handle; | |
209 | goto free_gpuvm; | |
210 | } | |
211 | ||
212 | if (kptr) { | |
5b87245f | 213 | err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, |
f35751b8 FK |
214 | (struct kgd_mem *)mem, kptr, NULL); |
215 | if (err) { | |
216 | pr_debug("Map GTT BO to kernel failed\n"); | |
217 | goto free_obj_handle; | |
218 | } | |
219 | } | |
220 | ||
221 | return err; | |
222 | ||
223 | free_obj_handle: | |
224 | kfd_process_device_remove_obj_handle(pdd, handle); | |
225 | free_gpuvm: | |
226 | sync_memory_failed: | |
227 | kfd_process_free_gpuvm(mem, pdd); | |
228 | return err; | |
229 | ||
230 | err_map_mem: | |
5b87245f | 231 | amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem); |
f35751b8 FK |
232 | err_alloc_mem: |
233 | *kptr = NULL; | |
234 | return err; | |
235 | } | |
236 | ||
552764b6 FK |
237 | /* kfd_process_device_reserve_ib_mem - Reserve memory inside the |
238 | * process for IB usage The memory reserved is for KFD to submit | |
239 | * IB to AMDGPU from kernel. If the memory is reserved | |
240 | * successfully, ib_kaddr will have the CPU/kernel | |
241 | * address. Check ib_kaddr before accessing the memory. | |
242 | */ | |
243 | static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) | |
244 | { | |
245 | struct qcm_process_device *qpd = &pdd->qpd; | |
246 | uint32_t flags = ALLOC_MEM_FLAGS_GTT | | |
247 | ALLOC_MEM_FLAGS_NO_SUBSTITUTE | | |
248 | ALLOC_MEM_FLAGS_WRITABLE | | |
249 | ALLOC_MEM_FLAGS_EXECUTABLE; | |
250 | void *kaddr; | |
251 | int ret; | |
252 | ||
253 | if (qpd->ib_kaddr || !qpd->ib_base) | |
254 | return 0; | |
255 | ||
256 | /* ib_base is only set for dGPU */ | |
257 | ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, | |
258 | &kaddr); | |
259 | if (ret) | |
260 | return ret; | |
261 | ||
262 | qpd->ib_kaddr = kaddr; | |
263 | ||
264 | return 0; | |
265 | } | |
266 | ||
373d7080 | 267 | struct kfd_process *kfd_create_process(struct file *filep) |
19f6d2a6 OG |
268 | { |
269 | struct kfd_process *process; | |
373d7080 | 270 | struct task_struct *thread = current; |
de9f26bb | 271 | int ret; |
19f6d2a6 | 272 | |
4eacc26b | 273 | if (!thread->mm) |
19f6d2a6 OG |
274 | return ERR_PTR(-EINVAL); |
275 | ||
276 | /* Only the pthreads threading model is supported. */ | |
277 | if (thread->group_leader->mm != thread->mm) | |
278 | return ERR_PTR(-EINVAL); | |
279 | ||
19f6d2a6 OG |
280 | /* |
281 | * take kfd processes mutex before starting of process creation | |
282 | * so there won't be a case where two threads of the same process | |
283 | * create two kfd_process structures | |
284 | */ | |
285 | mutex_lock(&kfd_processes_mutex); | |
286 | ||
287 | /* A prior open of /dev/kfd could have already created the process. */ | |
288 | process = find_process(thread); | |
de9f26bb | 289 | if (process) { |
79775b62 | 290 | pr_debug("Process already found\n"); |
de9f26bb | 291 | } else { |
c0ede1f8 | 292 | process = create_process(thread, filep); |
19f6d2a6 | 293 | |
de9f26bb KR |
294 | if (!procfs.kobj) |
295 | goto out; | |
296 | ||
297 | process->kobj = kfd_alloc_struct(process->kobj); | |
298 | if (!process->kobj) { | |
299 | pr_warn("Creating procfs kobject failed"); | |
300 | goto out; | |
301 | } | |
302 | ret = kobject_init_and_add(process->kobj, &procfs_type, | |
303 | procfs.kobj, "%d", | |
304 | (int)process->lead_thread->pid); | |
305 | if (ret) { | |
306 | pr_warn("Creating procfs pid directory failed"); | |
307 | goto out; | |
308 | } | |
309 | ||
310 | process->attr_pasid.name = "pasid"; | |
311 | process->attr_pasid.mode = KFD_SYSFS_FILE_MODE; | |
312 | sysfs_attr_init(&process->attr_pasid); | |
313 | ret = sysfs_create_file(process->kobj, &process->attr_pasid); | |
314 | if (ret) | |
315 | pr_warn("Creating pasid for pid %d failed", | |
316 | (int)process->lead_thread->pid); | |
317 | } | |
318 | out: | |
19f6d2a6 OG |
319 | mutex_unlock(&kfd_processes_mutex); |
320 | ||
19f6d2a6 OG |
321 | return process; |
322 | } | |
323 | ||
324 | struct kfd_process *kfd_get_process(const struct task_struct *thread) | |
325 | { | |
326 | struct kfd_process *process; | |
327 | ||
4eacc26b | 328 | if (!thread->mm) |
19f6d2a6 OG |
329 | return ERR_PTR(-EINVAL); |
330 | ||
331 | /* Only the pthreads threading model is supported. */ | |
332 | if (thread->group_leader->mm != thread->mm) | |
333 | return ERR_PTR(-EINVAL); | |
334 | ||
335 | process = find_process(thread); | |
e47cb828 WL |
336 | if (!process) |
337 | return ERR_PTR(-EINVAL); | |
19f6d2a6 OG |
338 | |
339 | return process; | |
340 | } | |
341 | ||
342 | static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) | |
343 | { | |
344 | struct kfd_process *process; | |
345 | ||
346 | hash_for_each_possible_rcu(kfd_processes_table, process, | |
347 | kfd_processes, (uintptr_t)mm) | |
348 | if (process->mm == mm) | |
349 | return process; | |
350 | ||
351 | return NULL; | |
352 | } | |
353 | ||
354 | static struct kfd_process *find_process(const struct task_struct *thread) | |
355 | { | |
356 | struct kfd_process *p; | |
357 | int idx; | |
358 | ||
359 | idx = srcu_read_lock(&kfd_processes_srcu); | |
360 | p = find_process_by_mm(thread->mm); | |
361 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
362 | ||
363 | return p; | |
364 | } | |
365 | ||
abb208a8 FK |
366 | void kfd_unref_process(struct kfd_process *p) |
367 | { | |
368 | kref_put(&p->ref, kfd_process_ref_release); | |
369 | } | |
370 | ||
52b29d73 FK |
371 | static void kfd_process_device_free_bos(struct kfd_process_device *pdd) |
372 | { | |
373 | struct kfd_process *p = pdd->process; | |
374 | void *mem; | |
375 | int id; | |
376 | ||
377 | /* | |
378 | * Remove all handles from idr and release appropriate | |
379 | * local memory object | |
380 | */ | |
381 | idr_for_each_entry(&pdd->alloc_idr, mem, id) { | |
382 | struct kfd_process_device *peer_pdd; | |
383 | ||
384 | list_for_each_entry(peer_pdd, &p->per_device_data, | |
385 | per_device_list) { | |
386 | if (!peer_pdd->vm) | |
387 | continue; | |
5b87245f | 388 | amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( |
52b29d73 FK |
389 | peer_pdd->dev->kgd, mem, peer_pdd->vm); |
390 | } | |
391 | ||
5b87245f | 392 | amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem); |
52b29d73 FK |
393 | kfd_process_device_remove_obj_handle(pdd, id); |
394 | } | |
395 | } | |
396 | ||
397 | static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) | |
398 | { | |
399 | struct kfd_process_device *pdd; | |
400 | ||
401 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | |
402 | kfd_process_device_free_bos(pdd); | |
403 | } | |
404 | ||
de1450a5 | 405 | static void kfd_process_destroy_pdds(struct kfd_process *p) |
19f6d2a6 | 406 | { |
19f6d2a6 | 407 | struct kfd_process_device *pdd, *temp; |
19f6d2a6 | 408 | |
19f6d2a6 | 409 | list_for_each_entry_safe(pdd, temp, &p->per_device_data, |
de1450a5 FK |
410 | per_device_list) { |
411 | pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", | |
94a1ee09 OG |
412 | pdd->dev->id, p->pasid); |
413 | ||
bf47afba | 414 | if (pdd->drm_file) { |
5b87245f AL |
415 | amdgpu_amdkfd_gpuvm_release_process_vm( |
416 | pdd->dev->kgd, pdd->vm); | |
b84394e2 | 417 | fput(pdd->drm_file); |
bf47afba | 418 | } |
b84394e2 | 419 | else if (pdd->vm) |
5b87245f | 420 | amdgpu_amdkfd_gpuvm_destroy_process_vm( |
403575c4 FK |
421 | pdd->dev->kgd, pdd->vm); |
422 | ||
733fa1f7 | 423 | list_del(&pdd->per_device_list); |
373d7080 | 424 | |
f35751b8 | 425 | if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) |
373d7080 FK |
426 | free_pages((unsigned long)pdd->qpd.cwsr_kaddr, |
427 | get_order(KFD_CWSR_TBA_TMA_SIZE)); | |
428 | ||
ef568db7 | 429 | kfree(pdd->qpd.doorbell_bitmap); |
52b29d73 FK |
430 | idr_destroy(&pdd->alloc_idr); |
431 | ||
19f6d2a6 OG |
432 | kfree(pdd); |
433 | } | |
de1450a5 FK |
434 | } |
435 | ||
436 | /* No process locking is needed in this function, because the process | |
437 | * is not findable any more. We must assume that no other thread is | |
438 | * using it any more, otherwise we couldn't safely free the process | |
439 | * structure in the end. | |
440 | */ | |
441 | static void kfd_process_wq_release(struct work_struct *work) | |
442 | { | |
443 | struct kfd_process *p = container_of(work, struct kfd_process, | |
444 | release_work); | |
de1450a5 | 445 | |
de9f26bb KR |
446 | /* Remove the procfs files */ |
447 | if (p->kobj) { | |
448 | sysfs_remove_file(p->kobj, &p->attr_pasid); | |
449 | kobject_del(p->kobj); | |
450 | kobject_put(p->kobj); | |
451 | p->kobj = NULL; | |
452 | } | |
453 | ||
64d1c3a4 | 454 | kfd_iommu_unbind_process(p); |
de1450a5 | 455 | |
52b29d73 FK |
456 | kfd_process_free_outstanding_kfd_bos(p); |
457 | ||
de1450a5 | 458 | kfd_process_destroy_pdds(p); |
403575c4 | 459 | dma_fence_put(p->ef); |
19f6d2a6 | 460 | |
f3a39818 AL |
461 | kfd_event_free_process(p); |
462 | ||
19f6d2a6 | 463 | kfd_pasid_free(p->pasid); |
a91e70e3 | 464 | kfd_free_process_doorbells(p); |
19f6d2a6 | 465 | |
19f6d2a6 OG |
466 | mutex_destroy(&p->mutex); |
467 | ||
c7b1243e FK |
468 | put_task_struct(p->lead_thread); |
469 | ||
19f6d2a6 | 470 | kfree(p); |
19f6d2a6 OG |
471 | } |
472 | ||
5ce10687 | 473 | static void kfd_process_ref_release(struct kref *ref) |
19f6d2a6 | 474 | { |
5ce10687 | 475 | struct kfd_process *p = container_of(ref, struct kfd_process, ref); |
19f6d2a6 | 476 | |
5ce10687 FK |
477 | INIT_WORK(&p->release_work, kfd_process_wq_release); |
478 | queue_work(kfd_process_wq, &p->release_work); | |
479 | } | |
19f6d2a6 | 480 | |
5ce10687 FK |
481 | static void kfd_process_destroy_delayed(struct rcu_head *rcu) |
482 | { | |
483 | struct kfd_process *p = container_of(rcu, struct kfd_process, rcu); | |
19f6d2a6 | 484 | |
abb208a8 | 485 | kfd_unref_process(p); |
19f6d2a6 OG |
486 | } |
487 | ||
488 | static void kfd_process_notifier_release(struct mmu_notifier *mn, | |
489 | struct mm_struct *mm) | |
490 | { | |
491 | struct kfd_process *p; | |
a82918f1 | 492 | struct kfd_process_device *pdd = NULL; |
19f6d2a6 OG |
493 | |
494 | /* | |
495 | * The kfd_process structure can not be free because the | |
496 | * mmu_notifier srcu is read locked | |
497 | */ | |
498 | p = container_of(mn, struct kfd_process, mmu_notifier); | |
32fa8219 FK |
499 | if (WARN_ON(p->mm != mm)) |
500 | return; | |
19f6d2a6 OG |
501 | |
502 | mutex_lock(&kfd_processes_mutex); | |
503 | hash_del_rcu(&p->kfd_processes); | |
504 | mutex_unlock(&kfd_processes_mutex); | |
505 | synchronize_srcu(&kfd_processes_srcu); | |
506 | ||
26103436 FK |
507 | cancel_delayed_work_sync(&p->eviction_work); |
508 | cancel_delayed_work_sync(&p->restore_work); | |
509 | ||
45102048 BG |
510 | mutex_lock(&p->mutex); |
511 | ||
062c5672 YS |
512 | /* Iterate over all process device data structures and if the |
513 | * pdd is in debug mode, we should first force unregistration, | |
514 | * then we will be able to destroy the queues | |
515 | */ | |
516 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | |
517 | struct kfd_dev *dev = pdd->dev; | |
518 | ||
519 | mutex_lock(kfd_get_dbgmgr_mutex()); | |
520 | if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { | |
521 | if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { | |
522 | kfd_dbgmgr_destroy(dev->dbgmgr); | |
523 | dev->dbgmgr = NULL; | |
524 | } | |
525 | } | |
526 | mutex_unlock(kfd_get_dbgmgr_mutex()); | |
527 | } | |
528 | ||
9fd3f1bf | 529 | kfd_process_dequeue_from_all_devices(p); |
45102048 BG |
530 | pqm_uninit(&p->pqm); |
531 | ||
5ce10687 FK |
532 | /* Indicate to other users that MM is no longer valid */ |
533 | p->mm = NULL; | |
534 | ||
45102048 BG |
535 | mutex_unlock(&p->mutex); |
536 | ||
5ce10687 | 537 | mmu_notifier_unregister_no_release(&p->mmu_notifier, mm); |
19f6d2a6 OG |
538 | mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); |
539 | } | |
540 | ||
541 | static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { | |
542 | .release = kfd_process_notifier_release, | |
543 | }; | |
544 | ||
f35751b8 | 545 | static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) |
373d7080 | 546 | { |
373d7080 | 547 | unsigned long offset; |
f35751b8 | 548 | struct kfd_process_device *pdd; |
373d7080 | 549 | |
c0ede1f8 | 550 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { |
f35751b8 FK |
551 | struct kfd_dev *dev = pdd->dev; |
552 | struct qcm_process_device *qpd = &pdd->qpd; | |
553 | ||
554 | if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) | |
373d7080 | 555 | continue; |
f35751b8 | 556 | |
df03ef93 HK |
557 | offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) |
558 | << PAGE_SHIFT; | |
373d7080 FK |
559 | qpd->tba_addr = (int64_t)vm_mmap(filep, 0, |
560 | KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, | |
561 | MAP_SHARED, offset); | |
562 | ||
563 | if (IS_ERR_VALUE(qpd->tba_addr)) { | |
c0ede1f8 YZ |
564 | int err = qpd->tba_addr; |
565 | ||
566 | pr_err("Failure to set tba address. error %d.\n", err); | |
373d7080 FK |
567 | qpd->tba_addr = 0; |
568 | qpd->cwsr_kaddr = NULL; | |
c0ede1f8 | 569 | return err; |
373d7080 FK |
570 | } |
571 | ||
572 | memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); | |
573 | ||
574 | qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; | |
575 | pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", | |
576 | qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); | |
577 | } | |
c0ede1f8 YZ |
578 | |
579 | return 0; | |
373d7080 FK |
580 | } |
581 | ||
f35751b8 FK |
582 | static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) |
583 | { | |
584 | struct kfd_dev *dev = pdd->dev; | |
585 | struct qcm_process_device *qpd = &pdd->qpd; | |
586 | uint32_t flags = ALLOC_MEM_FLAGS_GTT | | |
587 | ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE; | |
588 | void *kaddr; | |
589 | int ret; | |
590 | ||
591 | if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) | |
592 | return 0; | |
593 | ||
594 | /* cwsr_base is only set for dGPU */ | |
595 | ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, | |
596 | KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr); | |
597 | if (ret) | |
598 | return ret; | |
599 | ||
600 | qpd->cwsr_kaddr = kaddr; | |
601 | qpd->tba_addr = qpd->cwsr_base; | |
602 | ||
603 | memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); | |
604 | ||
605 | qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; | |
606 | pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", | |
607 | qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); | |
608 | ||
609 | return 0; | |
610 | } | |
611 | ||
c0ede1f8 YZ |
612 | static struct kfd_process *create_process(const struct task_struct *thread, |
613 | struct file *filep) | |
19f6d2a6 OG |
614 | { |
615 | struct kfd_process *process; | |
616 | int err = -ENOMEM; | |
617 | ||
618 | process = kzalloc(sizeof(*process), GFP_KERNEL); | |
619 | ||
620 | if (!process) | |
621 | goto err_alloc_process; | |
622 | ||
19f6d2a6 OG |
623 | process->pasid = kfd_pasid_alloc(); |
624 | if (process->pasid == 0) | |
625 | goto err_alloc_pasid; | |
626 | ||
a91e70e3 FK |
627 | if (kfd_alloc_process_doorbells(process) < 0) |
628 | goto err_alloc_doorbells; | |
629 | ||
5ce10687 FK |
630 | kref_init(&process->ref); |
631 | ||
19f6d2a6 OG |
632 | mutex_init(&process->mutex); |
633 | ||
634 | process->mm = thread->mm; | |
635 | ||
636 | /* register notifier */ | |
637 | process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; | |
c0ede1f8 | 638 | err = mmu_notifier_register(&process->mmu_notifier, process->mm); |
19f6d2a6 OG |
639 | if (err) |
640 | goto err_mmu_notifier; | |
641 | ||
642 | hash_add_rcu(kfd_processes_table, &process->kfd_processes, | |
643 | (uintptr_t)process->mm); | |
644 | ||
645 | process->lead_thread = thread->group_leader; | |
c7b1243e | 646 | get_task_struct(process->lead_thread); |
19f6d2a6 | 647 | |
19f6d2a6 OG |
648 | INIT_LIST_HEAD(&process->per_device_data); |
649 | ||
f3a39818 AL |
650 | kfd_event_init_process(process); |
651 | ||
45102048 BG |
652 | err = pqm_init(&process->pqm, process); |
653 | if (err != 0) | |
654 | goto err_process_pqm_init; | |
655 | ||
dd59239a | 656 | /* init process apertures*/ |
10f1685f | 657 | process->is_32bit_user_mode = in_compat_syscall(); |
b312b2b2 DC |
658 | err = kfd_init_apertures(process); |
659 | if (err != 0) | |
7a10d63f | 660 | goto err_init_apertures; |
dd59239a | 661 | |
26103436 FK |
662 | INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); |
663 | INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); | |
664 | process->last_restore_timestamp = get_jiffies_64(); | |
665 | ||
f35751b8 | 666 | err = kfd_process_init_cwsr_apu(process, filep); |
c0ede1f8 YZ |
667 | if (err) |
668 | goto err_init_cwsr; | |
669 | ||
19f6d2a6 OG |
670 | return process; |
671 | ||
c0ede1f8 | 672 | err_init_cwsr: |
52b29d73 | 673 | kfd_process_free_outstanding_kfd_bos(process); |
c0ede1f8 | 674 | kfd_process_destroy_pdds(process); |
7a10d63f | 675 | err_init_apertures: |
dd59239a | 676 | pqm_uninit(&process->pqm); |
45102048 BG |
677 | err_process_pqm_init: |
678 | hash_del_rcu(&process->kfd_processes); | |
679 | synchronize_rcu(); | |
680 | mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); | |
19f6d2a6 | 681 | err_mmu_notifier: |
7fd5e03c | 682 | mutex_destroy(&process->mutex); |
a91e70e3 FK |
683 | kfd_free_process_doorbells(process); |
684 | err_alloc_doorbells: | |
19f6d2a6 OG |
685 | kfd_pasid_free(process->pasid); |
686 | err_alloc_pasid: | |
19f6d2a6 OG |
687 | kfree(process); |
688 | err_alloc_process: | |
689 | return ERR_PTR(err); | |
690 | } | |
691 | ||
ef568db7 FK |
692 | static int init_doorbell_bitmap(struct qcm_process_device *qpd, |
693 | struct kfd_dev *dev) | |
694 | { | |
695 | unsigned int i; | |
696 | ||
697 | if (!KFD_IS_SOC15(dev->device_info->asic_family)) | |
698 | return 0; | |
699 | ||
700 | qpd->doorbell_bitmap = | |
701 | kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, | |
702 | BITS_PER_BYTE), GFP_KERNEL); | |
703 | if (!qpd->doorbell_bitmap) | |
704 | return -ENOMEM; | |
705 | ||
1f86805a YZ |
706 | /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ |
707 | for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { | |
708 | if (i >= dev->shared_resources.non_cp_doorbells_start | |
709 | && i <= dev->shared_resources.non_cp_doorbells_end) { | |
ef568db7 | 710 | set_bit(i, qpd->doorbell_bitmap); |
1f86805a YZ |
711 | set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, |
712 | qpd->doorbell_bitmap); | |
713 | pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i, | |
714 | i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); | |
ef568db7 | 715 | } |
1f86805a | 716 | } |
ef568db7 FK |
717 | |
718 | return 0; | |
719 | } | |
720 | ||
19f6d2a6 | 721 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, |
093c7d8c | 722 | struct kfd_process *p) |
19f6d2a6 OG |
723 | { |
724 | struct kfd_process_device *pdd = NULL; | |
725 | ||
726 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | |
727 | if (pdd->dev == dev) | |
733fa1f7 | 728 | return pdd; |
093c7d8c | 729 | |
733fa1f7 | 730 | return NULL; |
093c7d8c AS |
731 | } |
732 | ||
733 | struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, | |
734 | struct kfd_process *p) | |
735 | { | |
736 | struct kfd_process_device *pdd = NULL; | |
737 | ||
738 | pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); | |
2d9b36f9 FK |
739 | if (!pdd) |
740 | return NULL; | |
741 | ||
ef568db7 FK |
742 | if (init_doorbell_bitmap(&pdd->qpd, dev)) { |
743 | pr_err("Failed to init doorbell for process\n"); | |
744 | kfree(pdd); | |
745 | return NULL; | |
746 | } | |
747 | ||
2d9b36f9 FK |
748 | pdd->dev = dev; |
749 | INIT_LIST_HEAD(&pdd->qpd.queues_list); | |
750 | INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); | |
751 | pdd->qpd.dqm = dev->dqm; | |
752 | pdd->qpd.pqm = &p->pqm; | |
26103436 | 753 | pdd->qpd.evicted = 0; |
2d9b36f9 FK |
754 | pdd->process = p; |
755 | pdd->bound = PDD_UNBOUND; | |
756 | pdd->already_dequeued = false; | |
757 | list_add(&pdd->per_device_list, &p->per_device_data); | |
19f6d2a6 | 758 | |
52b29d73 FK |
759 | /* Init idr used for memory handle translation */ |
760 | idr_init(&pdd->alloc_idr); | |
761 | ||
b84394e2 FK |
762 | return pdd; |
763 | } | |
764 | ||
765 | /** | |
766 | * kfd_process_device_init_vm - Initialize a VM for a process-device | |
767 | * | |
768 | * @pdd: The process-device | |
769 | * @drm_file: Optional pointer to a DRM file descriptor | |
770 | * | |
771 | * If @drm_file is specified, it will be used to acquire the VM from | |
772 | * that file descriptor. If successful, the @pdd takes ownership of | |
773 | * the file descriptor. | |
774 | * | |
775 | * If @drm_file is NULL, a new VM is created. | |
776 | * | |
777 | * Returns 0 on success, -errno on failure. | |
778 | */ | |
779 | int kfd_process_device_init_vm(struct kfd_process_device *pdd, | |
780 | struct file *drm_file) | |
781 | { | |
782 | struct kfd_process *p; | |
783 | struct kfd_dev *dev; | |
784 | int ret; | |
785 | ||
786 | if (pdd->vm) | |
787 | return drm_file ? -EBUSY : 0; | |
788 | ||
789 | p = pdd->process; | |
790 | dev = pdd->dev; | |
791 | ||
792 | if (drm_file) | |
5b87245f | 793 | ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( |
1685b01a | 794 | dev->kgd, drm_file, p->pasid, |
b84394e2 FK |
795 | &pdd->vm, &p->kgd_process_info, &p->ef); |
796 | else | |
5b87245f AL |
797 | ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid, |
798 | &pdd->vm, &p->kgd_process_info, &p->ef); | |
b84394e2 | 799 | if (ret) { |
403575c4 | 800 | pr_err("Failed to create process VM object\n"); |
b84394e2 | 801 | return ret; |
403575c4 | 802 | } |
403575c4 | 803 | |
f40c6912 YZ |
804 | amdgpu_vm_set_task_info(pdd->vm); |
805 | ||
552764b6 FK |
806 | ret = kfd_process_device_reserve_ib_mem(pdd); |
807 | if (ret) | |
808 | goto err_reserve_ib_mem; | |
f35751b8 FK |
809 | ret = kfd_process_device_init_cwsr_dgpu(pdd); |
810 | if (ret) | |
811 | goto err_init_cwsr; | |
812 | ||
b84394e2 FK |
813 | pdd->drm_file = drm_file; |
814 | ||
815 | return 0; | |
f35751b8 FK |
816 | |
817 | err_init_cwsr: | |
552764b6 | 818 | err_reserve_ib_mem: |
f35751b8 FK |
819 | kfd_process_device_free_bos(pdd); |
820 | if (!drm_file) | |
5b87245f | 821 | amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm); |
f35751b8 FK |
822 | pdd->vm = NULL; |
823 | ||
824 | return ret; | |
19f6d2a6 OG |
825 | } |
826 | ||
827 | /* | |
828 | * Direct the IOMMU to bind the process (specifically the pasid->mm) | |
829 | * to the device. | |
830 | * Unbinding occurs when the process dies or the device is removed. | |
831 | * | |
832 | * Assumes that the process lock is held. | |
833 | */ | |
834 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, | |
835 | struct kfd_process *p) | |
836 | { | |
093c7d8c | 837 | struct kfd_process_device *pdd; |
b17f068a | 838 | int err; |
19f6d2a6 | 839 | |
093c7d8c AS |
840 | pdd = kfd_get_process_device_data(dev, p); |
841 | if (!pdd) { | |
842 | pr_err("Process device data doesn't exist\n"); | |
19f6d2a6 | 843 | return ERR_PTR(-ENOMEM); |
093c7d8c | 844 | } |
19f6d2a6 | 845 | |
64d1c3a4 FK |
846 | err = kfd_iommu_bind_process_to_device(pdd); |
847 | if (err) | |
b17f068a OG |
848 | return ERR_PTR(err); |
849 | ||
b84394e2 FK |
850 | err = kfd_process_device_init_vm(pdd, NULL); |
851 | if (err) | |
852 | return ERR_PTR(err); | |
853 | ||
19f6d2a6 OG |
854 | return pdd; |
855 | } | |
856 | ||
8eabaf54 KR |
857 | struct kfd_process_device *kfd_get_first_process_device_data( |
858 | struct kfd_process *p) | |
19f6d2a6 OG |
859 | { |
860 | return list_first_entry(&p->per_device_data, | |
861 | struct kfd_process_device, | |
862 | per_device_list); | |
863 | } | |
864 | ||
8eabaf54 KR |
865 | struct kfd_process_device *kfd_get_next_process_device_data( |
866 | struct kfd_process *p, | |
19f6d2a6 OG |
867 | struct kfd_process_device *pdd) |
868 | { | |
869 | if (list_is_last(&pdd->per_device_list, &p->per_device_data)) | |
870 | return NULL; | |
871 | return list_next_entry(pdd, per_device_list); | |
872 | } | |
873 | ||
874 | bool kfd_has_process_device_data(struct kfd_process *p) | |
875 | { | |
876 | return !(list_empty(&p->per_device_data)); | |
877 | } | |
f3a39818 | 878 | |
52b29d73 FK |
879 | /* Create specific handle mapped to mem from process local memory idr |
880 | * Assumes that the process lock is held. | |
881 | */ | |
882 | int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, | |
883 | void *mem) | |
884 | { | |
885 | return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); | |
886 | } | |
887 | ||
888 | /* Translate specific handle from process local memory idr | |
889 | * Assumes that the process lock is held. | |
890 | */ | |
891 | void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, | |
892 | int handle) | |
893 | { | |
894 | if (handle < 0) | |
895 | return NULL; | |
896 | ||
897 | return idr_find(&pdd->alloc_idr, handle); | |
898 | } | |
899 | ||
900 | /* Remove specific handle from process local memory idr | |
901 | * Assumes that the process lock is held. | |
902 | */ | |
903 | void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, | |
904 | int handle) | |
905 | { | |
906 | if (handle >= 0) | |
907 | idr_remove(&pdd->alloc_idr, handle); | |
908 | } | |
909 | ||
abb208a8 | 910 | /* This increments the process->ref counter. */ |
f3a39818 AL |
911 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) |
912 | { | |
82c16b42 | 913 | struct kfd_process *p, *ret_p = NULL; |
f3a39818 AL |
914 | unsigned int temp; |
915 | ||
916 | int idx = srcu_read_lock(&kfd_processes_srcu); | |
917 | ||
918 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | |
919 | if (p->pasid == pasid) { | |
abb208a8 | 920 | kref_get(&p->ref); |
82c16b42 | 921 | ret_p = p; |
f3a39818 AL |
922 | break; |
923 | } | |
924 | } | |
925 | ||
926 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
927 | ||
82c16b42 | 928 | return ret_p; |
f3a39818 | 929 | } |
373d7080 | 930 | |
26103436 FK |
931 | /* This increments the process->ref counter. */ |
932 | struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) | |
933 | { | |
934 | struct kfd_process *p; | |
935 | ||
936 | int idx = srcu_read_lock(&kfd_processes_srcu); | |
937 | ||
938 | p = find_process_by_mm(mm); | |
939 | if (p) | |
940 | kref_get(&p->ref); | |
941 | ||
942 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
943 | ||
944 | return p; | |
945 | } | |
946 | ||
947 | /* process_evict_queues - Evict all user queues of a process | |
948 | * | |
949 | * Eviction is reference-counted per process-device. This means multiple | |
950 | * evictions from different sources can be nested safely. | |
951 | */ | |
6b95e797 | 952 | int kfd_process_evict_queues(struct kfd_process *p) |
26103436 FK |
953 | { |
954 | struct kfd_process_device *pdd; | |
955 | int r = 0; | |
956 | unsigned int n_evicted = 0; | |
957 | ||
958 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | |
959 | r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, | |
960 | &pdd->qpd); | |
961 | if (r) { | |
962 | pr_err("Failed to evict process queues\n"); | |
963 | goto fail; | |
964 | } | |
965 | n_evicted++; | |
966 | } | |
967 | ||
968 | return r; | |
969 | ||
970 | fail: | |
971 | /* To keep state consistent, roll back partial eviction by | |
972 | * restoring queues | |
973 | */ | |
974 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | |
975 | if (n_evicted == 0) | |
976 | break; | |
977 | if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, | |
978 | &pdd->qpd)) | |
979 | pr_err("Failed to restore queues\n"); | |
980 | ||
981 | n_evicted--; | |
982 | } | |
983 | ||
984 | return r; | |
985 | } | |
986 | ||
987 | /* process_restore_queues - Restore all user queues of a process */ | |
6b95e797 | 988 | int kfd_process_restore_queues(struct kfd_process *p) |
26103436 FK |
989 | { |
990 | struct kfd_process_device *pdd; | |
991 | int r, ret = 0; | |
992 | ||
993 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | |
994 | r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, | |
995 | &pdd->qpd); | |
996 | if (r) { | |
997 | pr_err("Failed to restore process queues\n"); | |
998 | if (!ret) | |
999 | ret = r; | |
1000 | } | |
1001 | } | |
1002 | ||
1003 | return ret; | |
1004 | } | |
1005 | ||
1006 | static void evict_process_worker(struct work_struct *work) | |
1007 | { | |
1008 | int ret; | |
1009 | struct kfd_process *p; | |
1010 | struct delayed_work *dwork; | |
1011 | ||
1012 | dwork = to_delayed_work(work); | |
1013 | ||
1014 | /* Process termination destroys this worker thread. So during the | |
1015 | * lifetime of this thread, kfd_process p will be valid | |
1016 | */ | |
1017 | p = container_of(dwork, struct kfd_process, eviction_work); | |
1018 | WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, | |
1019 | "Eviction fence mismatch\n"); | |
1020 | ||
1021 | /* Narrow window of overlap between restore and evict work | |
1022 | * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos | |
1023 | * unreserves KFD BOs, it is possible to evicted again. But | |
1024 | * restore has few more steps of finish. So lets wait for any | |
1025 | * previous restore work to complete | |
1026 | */ | |
1027 | flush_delayed_work(&p->restore_work); | |
1028 | ||
1029 | pr_debug("Started evicting pasid %d\n", p->pasid); | |
6b95e797 | 1030 | ret = kfd_process_evict_queues(p); |
26103436 FK |
1031 | if (!ret) { |
1032 | dma_fence_signal(p->ef); | |
1033 | dma_fence_put(p->ef); | |
1034 | p->ef = NULL; | |
1679ae8f | 1035 | queue_delayed_work(kfd_restore_wq, &p->restore_work, |
26103436 FK |
1036 | msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); |
1037 | ||
1038 | pr_debug("Finished evicting pasid %d\n", p->pasid); | |
1039 | } else | |
1040 | pr_err("Failed to evict queues of pasid %d\n", p->pasid); | |
1041 | } | |
1042 | ||
1043 | static void restore_process_worker(struct work_struct *work) | |
1044 | { | |
1045 | struct delayed_work *dwork; | |
1046 | struct kfd_process *p; | |
26103436 FK |
1047 | int ret = 0; |
1048 | ||
1049 | dwork = to_delayed_work(work); | |
1050 | ||
1051 | /* Process termination destroys this worker thread. So during the | |
1052 | * lifetime of this thread, kfd_process p will be valid | |
1053 | */ | |
1054 | p = container_of(dwork, struct kfd_process, restore_work); | |
26103436 FK |
1055 | pr_debug("Started restoring pasid %d\n", p->pasid); |
1056 | ||
1057 | /* Setting last_restore_timestamp before successful restoration. | |
1058 | * Otherwise this would have to be set by KGD (restore_process_bos) | |
1059 | * before KFD BOs are unreserved. If not, the process can be evicted | |
1060 | * again before the timestamp is set. | |
1061 | * If restore fails, the timestamp will be set again in the next | |
1062 | * attempt. This would mean that the minimum GPU quanta would be | |
1063 | * PROCESS_ACTIVE_TIME_MS - (time to execute the following two | |
1064 | * functions) | |
1065 | */ | |
1066 | ||
1067 | p->last_restore_timestamp = get_jiffies_64(); | |
5b87245f | 1068 | ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, |
26103436 FK |
1069 | &p->ef); |
1070 | if (ret) { | |
1071 | pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", | |
1072 | p->pasid, PROCESS_BACK_OFF_TIME_MS); | |
1679ae8f | 1073 | ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, |
26103436 FK |
1074 | msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); |
1075 | WARN(!ret, "reschedule restore work failed\n"); | |
1076 | return; | |
1077 | } | |
1078 | ||
6b95e797 | 1079 | ret = kfd_process_restore_queues(p); |
26103436 FK |
1080 | if (!ret) |
1081 | pr_debug("Finished restoring pasid %d\n", p->pasid); | |
1082 | else | |
1083 | pr_err("Failed to restore queues of pasid %d\n", p->pasid); | |
1084 | } | |
1085 | ||
1086 | void kfd_suspend_all_processes(void) | |
1087 | { | |
1088 | struct kfd_process *p; | |
1089 | unsigned int temp; | |
1090 | int idx = srcu_read_lock(&kfd_processes_srcu); | |
1091 | ||
1092 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | |
1093 | cancel_delayed_work_sync(&p->eviction_work); | |
1094 | cancel_delayed_work_sync(&p->restore_work); | |
1095 | ||
6b95e797 | 1096 | if (kfd_process_evict_queues(p)) |
26103436 FK |
1097 | pr_err("Failed to suspend process %d\n", p->pasid); |
1098 | dma_fence_signal(p->ef); | |
1099 | dma_fence_put(p->ef); | |
1100 | p->ef = NULL; | |
1101 | } | |
1102 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
1103 | } | |
1104 | ||
1105 | int kfd_resume_all_processes(void) | |
1106 | { | |
1107 | struct kfd_process *p; | |
1108 | unsigned int temp; | |
1109 | int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); | |
1110 | ||
1111 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | |
1679ae8f | 1112 | if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { |
26103436 FK |
1113 | pr_err("Restore process %d failed during resume\n", |
1114 | p->pasid); | |
1115 | ret = -EFAULT; | |
1116 | } | |
1117 | } | |
1118 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
1119 | return ret; | |
1120 | } | |
1121 | ||
df03ef93 | 1122 | int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, |
373d7080 FK |
1123 | struct vm_area_struct *vma) |
1124 | { | |
373d7080 FK |
1125 | struct kfd_process_device *pdd; |
1126 | struct qcm_process_device *qpd; | |
1127 | ||
373d7080 FK |
1128 | if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { |
1129 | pr_err("Incorrect CWSR mapping size.\n"); | |
1130 | return -EINVAL; | |
1131 | } | |
1132 | ||
1133 | pdd = kfd_get_process_device_data(dev, process); | |
1134 | if (!pdd) | |
1135 | return -EINVAL; | |
1136 | qpd = &pdd->qpd; | |
1137 | ||
1138 | qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | |
1139 | get_order(KFD_CWSR_TBA_TMA_SIZE)); | |
1140 | if (!qpd->cwsr_kaddr) { | |
1141 | pr_err("Error allocating per process CWSR buffer.\n"); | |
1142 | return -ENOMEM; | |
1143 | } | |
1144 | ||
1145 | vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | |
1146 | | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; | |
1147 | /* Mapping pages to user process */ | |
1148 | return remap_pfn_range(vma, vma->vm_start, | |
1149 | PFN_DOWN(__pa(qpd->cwsr_kaddr)), | |
1150 | KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); | |
1151 | } | |
851a645e | 1152 | |
403575c4 FK |
1153 | void kfd_flush_tlb(struct kfd_process_device *pdd) |
1154 | { | |
1155 | struct kfd_dev *dev = pdd->dev; | |
1156 | const struct kfd2kgd_calls *f2g = dev->kfd2kgd; | |
1157 | ||
1158 | if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { | |
1159 | /* Nothing to flush until a VMID is assigned, which | |
1160 | * only happens when the first queue is created. | |
1161 | */ | |
1162 | if (pdd->qpd.vmid) | |
1163 | f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); | |
1164 | } else { | |
1165 | f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); | |
1166 | } | |
1167 | } | |
1168 | ||
851a645e FK |
1169 | #if defined(CONFIG_DEBUG_FS) |
1170 | ||
1171 | int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) | |
1172 | { | |
1173 | struct kfd_process *p; | |
1174 | unsigned int temp; | |
1175 | int r = 0; | |
1176 | ||
1177 | int idx = srcu_read_lock(&kfd_processes_srcu); | |
1178 | ||
1179 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | |
1180 | seq_printf(m, "Process %d PASID %d:\n", | |
1181 | p->lead_thread->tgid, p->pasid); | |
1182 | ||
1183 | mutex_lock(&p->mutex); | |
1184 | r = pqm_debugfs_mqds(m, &p->pqm); | |
1185 | mutex_unlock(&p->mutex); | |
1186 | ||
1187 | if (r) | |
1188 | break; | |
1189 | } | |
1190 | ||
1191 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
1192 | ||
1193 | return r; | |
1194 | } | |
1195 | ||
1196 | #endif | |
14328aa5 | 1197 |