Commit | Line | Data |
---|---|---|
19f6d2a6 OG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #include <linux/mutex.h> | |
24 | #include <linux/log2.h> | |
25 | #include <linux/sched.h> | |
26 | #include <linux/slab.h> | |
b17f068a | 27 | #include <linux/amd-iommu.h> |
19f6d2a6 | 28 | #include <linux/notifier.h> |
dd59239a AS |
29 | #include <linux/compat.h> |
30 | ||
19f6d2a6 OG |
31 | struct mm_struct; |
32 | ||
33 | #include "kfd_priv.h" | |
c3447e81 | 34 | #include "kfd_dbgmgr.h" |
19f6d2a6 OG |
35 | |
36 | /* | |
37 | * Initial size for the array of queues. | |
38 | * The allocated size is doubled each time | |
39 | * it is exceeded up to MAX_PROCESS_QUEUES. | |
40 | */ | |
41 | #define INITIAL_QUEUE_ARRAY_SIZE 16 | |
42 | ||
43 | /* | |
44 | * List of struct kfd_process (field kfd_process). | |
45 | * Unique/indexed by mm_struct* | |
46 | */ | |
47 | #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ | |
48 | static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); | |
49 | static DEFINE_MUTEX(kfd_processes_mutex); | |
50 | ||
51 | DEFINE_STATIC_SRCU(kfd_processes_srcu); | |
52 | ||
53 | static struct workqueue_struct *kfd_process_wq; | |
54 | ||
55 | struct kfd_process_release_work { | |
56 | struct work_struct kfd_work; | |
57 | struct kfd_process *p; | |
58 | }; | |
59 | ||
60 | static struct kfd_process *find_process(const struct task_struct *thread); | |
61 | static struct kfd_process *create_process(const struct task_struct *thread); | |
62 | ||
63 | void kfd_process_create_wq(void) | |
64 | { | |
65 | if (!kfd_process_wq) | |
66 | kfd_process_wq = create_workqueue("kfd_process_wq"); | |
67 | } | |
68 | ||
69 | void kfd_process_destroy_wq(void) | |
70 | { | |
71 | if (kfd_process_wq) { | |
72 | flush_workqueue(kfd_process_wq); | |
73 | destroy_workqueue(kfd_process_wq); | |
74 | kfd_process_wq = NULL; | |
75 | } | |
76 | } | |
77 | ||
78 | struct kfd_process *kfd_create_process(const struct task_struct *thread) | |
79 | { | |
80 | struct kfd_process *process; | |
81 | ||
82 | BUG_ON(!kfd_process_wq); | |
83 | ||
84 | if (thread->mm == NULL) | |
85 | return ERR_PTR(-EINVAL); | |
86 | ||
87 | /* Only the pthreads threading model is supported. */ | |
88 | if (thread->group_leader->mm != thread->mm) | |
89 | return ERR_PTR(-EINVAL); | |
90 | ||
91 | /* Take mmap_sem because we call __mmu_notifier_register inside */ | |
92 | down_write(&thread->mm->mmap_sem); | |
93 | ||
94 | /* | |
95 | * take kfd processes mutex before starting of process creation | |
96 | * so there won't be a case where two threads of the same process | |
97 | * create two kfd_process structures | |
98 | */ | |
99 | mutex_lock(&kfd_processes_mutex); | |
100 | ||
101 | /* A prior open of /dev/kfd could have already created the process. */ | |
102 | process = find_process(thread); | |
103 | if (process) | |
104 | pr_debug("kfd: process already found\n"); | |
105 | ||
106 | if (!process) | |
107 | process = create_process(thread); | |
108 | ||
109 | mutex_unlock(&kfd_processes_mutex); | |
110 | ||
111 | up_write(&thread->mm->mmap_sem); | |
112 | ||
113 | return process; | |
114 | } | |
115 | ||
116 | struct kfd_process *kfd_get_process(const struct task_struct *thread) | |
117 | { | |
118 | struct kfd_process *process; | |
119 | ||
120 | if (thread->mm == NULL) | |
121 | return ERR_PTR(-EINVAL); | |
122 | ||
123 | /* Only the pthreads threading model is supported. */ | |
124 | if (thread->group_leader->mm != thread->mm) | |
125 | return ERR_PTR(-EINVAL); | |
126 | ||
127 | process = find_process(thread); | |
128 | ||
129 | return process; | |
130 | } | |
131 | ||
132 | static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) | |
133 | { | |
134 | struct kfd_process *process; | |
135 | ||
136 | hash_for_each_possible_rcu(kfd_processes_table, process, | |
137 | kfd_processes, (uintptr_t)mm) | |
138 | if (process->mm == mm) | |
139 | return process; | |
140 | ||
141 | return NULL; | |
142 | } | |
143 | ||
144 | static struct kfd_process *find_process(const struct task_struct *thread) | |
145 | { | |
146 | struct kfd_process *p; | |
147 | int idx; | |
148 | ||
149 | idx = srcu_read_lock(&kfd_processes_srcu); | |
150 | p = find_process_by_mm(thread->mm); | |
151 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
152 | ||
153 | return p; | |
154 | } | |
155 | ||
156 | static void kfd_process_wq_release(struct work_struct *work) | |
157 | { | |
158 | struct kfd_process_release_work *my_work; | |
159 | struct kfd_process_device *pdd, *temp; | |
160 | struct kfd_process *p; | |
161 | ||
162 | my_work = (struct kfd_process_release_work *) work; | |
163 | ||
164 | p = my_work->p; | |
165 | ||
94a1ee09 OG |
166 | pr_debug("Releasing process (pasid %d) in workqueue\n", |
167 | p->pasid); | |
168 | ||
19f6d2a6 OG |
169 | mutex_lock(&p->mutex); |
170 | ||
171 | list_for_each_entry_safe(pdd, temp, &p->per_device_data, | |
172 | per_device_list) { | |
94a1ee09 OG |
173 | pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", |
174 | pdd->dev->id, p->pasid); | |
175 | ||
a82918f1 | 176 | if (pdd->reset_wavefronts) |
c3447e81 BG |
177 | dbgdev_wave_reset_wavefronts(pdd->dev, p); |
178 | ||
b17f068a | 179 | amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); |
19f6d2a6 OG |
180 | list_del(&pdd->per_device_list); |
181 | ||
182 | kfree(pdd); | |
183 | } | |
184 | ||
f3a39818 AL |
185 | kfd_event_free_process(p); |
186 | ||
19f6d2a6 OG |
187 | kfd_pasid_free(p->pasid); |
188 | ||
189 | mutex_unlock(&p->mutex); | |
190 | ||
191 | mutex_destroy(&p->mutex); | |
192 | ||
193 | kfree(p->queues); | |
194 | ||
195 | kfree(p); | |
196 | ||
642f0f2a | 197 | kfree(work); |
19f6d2a6 OG |
198 | } |
199 | ||
200 | static void kfd_process_destroy_delayed(struct rcu_head *rcu) | |
201 | { | |
202 | struct kfd_process_release_work *work; | |
203 | struct kfd_process *p; | |
204 | ||
205 | BUG_ON(!kfd_process_wq); | |
206 | ||
207 | p = container_of(rcu, struct kfd_process, rcu); | |
208 | BUG_ON(atomic_read(&p->mm->mm_count) <= 0); | |
209 | ||
210 | mmdrop(p->mm); | |
211 | ||
1549fcd1 | 212 | work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); |
19f6d2a6 OG |
213 | |
214 | if (work) { | |
215 | INIT_WORK((struct work_struct *) work, kfd_process_wq_release); | |
216 | work->p = p; | |
217 | queue_work(kfd_process_wq, (struct work_struct *) work); | |
218 | } | |
219 | } | |
220 | ||
221 | static void kfd_process_notifier_release(struct mmu_notifier *mn, | |
222 | struct mm_struct *mm) | |
223 | { | |
224 | struct kfd_process *p; | |
a82918f1 | 225 | struct kfd_process_device *pdd = NULL; |
19f6d2a6 OG |
226 | |
227 | /* | |
228 | * The kfd_process structure can not be free because the | |
229 | * mmu_notifier srcu is read locked | |
230 | */ | |
231 | p = container_of(mn, struct kfd_process, mmu_notifier); | |
232 | BUG_ON(p->mm != mm); | |
233 | ||
234 | mutex_lock(&kfd_processes_mutex); | |
235 | hash_del_rcu(&p->kfd_processes); | |
236 | mutex_unlock(&kfd_processes_mutex); | |
237 | synchronize_srcu(&kfd_processes_srcu); | |
238 | ||
45102048 BG |
239 | mutex_lock(&p->mutex); |
240 | ||
241 | /* In case our notifier is called before IOMMU notifier */ | |
242 | pqm_uninit(&p->pqm); | |
243 | ||
a82918f1 BG |
244 | /* Iterate over all process device data structure and check |
245 | * if we should reset all wavefronts */ | |
246 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | |
247 | if (pdd->reset_wavefronts) { | |
248 | pr_warn("amdkfd: Resetting all wave fronts\n"); | |
249 | dbgdev_wave_reset_wavefronts(pdd->dev, p); | |
250 | pdd->reset_wavefronts = false; | |
251 | } | |
252 | ||
45102048 BG |
253 | mutex_unlock(&p->mutex); |
254 | ||
19f6d2a6 OG |
255 | /* |
256 | * Because we drop mm_count inside kfd_process_destroy_delayed | |
257 | * and because the mmu_notifier_unregister function also drop | |
258 | * mm_count we need to take an extra count here. | |
259 | */ | |
260 | atomic_inc(&p->mm->mm_count); | |
261 | mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); | |
262 | mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); | |
263 | } | |
264 | ||
265 | static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { | |
266 | .release = kfd_process_notifier_release, | |
267 | }; | |
268 | ||
269 | static struct kfd_process *create_process(const struct task_struct *thread) | |
270 | { | |
271 | struct kfd_process *process; | |
272 | int err = -ENOMEM; | |
273 | ||
274 | process = kzalloc(sizeof(*process), GFP_KERNEL); | |
275 | ||
276 | if (!process) | |
277 | goto err_alloc_process; | |
278 | ||
279 | process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, | |
280 | sizeof(process->queues[0]), GFP_KERNEL); | |
281 | if (!process->queues) | |
282 | goto err_alloc_queues; | |
283 | ||
284 | process->pasid = kfd_pasid_alloc(); | |
285 | if (process->pasid == 0) | |
286 | goto err_alloc_pasid; | |
287 | ||
288 | mutex_init(&process->mutex); | |
289 | ||
290 | process->mm = thread->mm; | |
291 | ||
292 | /* register notifier */ | |
293 | process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; | |
294 | err = __mmu_notifier_register(&process->mmu_notifier, process->mm); | |
295 | if (err) | |
296 | goto err_mmu_notifier; | |
297 | ||
298 | hash_add_rcu(kfd_processes_table, &process->kfd_processes, | |
299 | (uintptr_t)process->mm); | |
300 | ||
301 | process->lead_thread = thread->group_leader; | |
302 | ||
303 | process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; | |
304 | ||
305 | INIT_LIST_HEAD(&process->per_device_data); | |
306 | ||
f3a39818 AL |
307 | kfd_event_init_process(process); |
308 | ||
45102048 BG |
309 | err = pqm_init(&process->pqm, process); |
310 | if (err != 0) | |
311 | goto err_process_pqm_init; | |
312 | ||
dd59239a | 313 | /* init process apertures*/ |
10f1685f | 314 | process->is_32bit_user_mode = in_compat_syscall(); |
dd59239a AS |
315 | if (kfd_init_apertures(process) != 0) |
316 | goto err_init_apretures; | |
317 | ||
19f6d2a6 OG |
318 | return process; |
319 | ||
dd59239a AS |
320 | err_init_apretures: |
321 | pqm_uninit(&process->pqm); | |
45102048 BG |
322 | err_process_pqm_init: |
323 | hash_del_rcu(&process->kfd_processes); | |
324 | synchronize_rcu(); | |
325 | mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); | |
19f6d2a6 OG |
326 | err_mmu_notifier: |
327 | kfd_pasid_free(process->pasid); | |
328 | err_alloc_pasid: | |
329 | kfree(process->queues); | |
330 | err_alloc_queues: | |
331 | kfree(process); | |
332 | err_alloc_process: | |
333 | return ERR_PTR(err); | |
334 | } | |
335 | ||
336 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, | |
093c7d8c | 337 | struct kfd_process *p) |
19f6d2a6 OG |
338 | { |
339 | struct kfd_process_device *pdd = NULL; | |
340 | ||
341 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | |
342 | if (pdd->dev == dev) | |
093c7d8c AS |
343 | break; |
344 | ||
345 | return pdd; | |
346 | } | |
347 | ||
348 | struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, | |
349 | struct kfd_process *p) | |
350 | { | |
351 | struct kfd_process_device *pdd = NULL; | |
352 | ||
353 | pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); | |
354 | if (pdd != NULL) { | |
355 | pdd->dev = dev; | |
356 | INIT_LIST_HEAD(&pdd->qpd.queues_list); | |
357 | INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); | |
358 | pdd->qpd.dqm = dev->dqm; | |
a82918f1 | 359 | pdd->reset_wavefronts = false; |
093c7d8c | 360 | list_add(&pdd->per_device_list, &p->per_device_data); |
19f6d2a6 OG |
361 | } |
362 | ||
363 | return pdd; | |
364 | } | |
365 | ||
366 | /* | |
367 | * Direct the IOMMU to bind the process (specifically the pasid->mm) | |
368 | * to the device. | |
369 | * Unbinding occurs when the process dies or the device is removed. | |
370 | * | |
371 | * Assumes that the process lock is held. | |
372 | */ | |
373 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, | |
374 | struct kfd_process *p) | |
375 | { | |
093c7d8c | 376 | struct kfd_process_device *pdd; |
b17f068a | 377 | int err; |
19f6d2a6 | 378 | |
093c7d8c AS |
379 | pdd = kfd_get_process_device_data(dev, p); |
380 | if (!pdd) { | |
381 | pr_err("Process device data doesn't exist\n"); | |
19f6d2a6 | 382 | return ERR_PTR(-ENOMEM); |
093c7d8c | 383 | } |
19f6d2a6 OG |
384 | |
385 | if (pdd->bound) | |
386 | return pdd; | |
387 | ||
b17f068a OG |
388 | err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); |
389 | if (err < 0) | |
390 | return ERR_PTR(err); | |
391 | ||
19f6d2a6 OG |
392 | pdd->bound = true; |
393 | ||
394 | return pdd; | |
395 | } | |
396 | ||
397 | void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) | |
398 | { | |
399 | struct kfd_process *p; | |
400 | struct kfd_process_device *pdd; | |
401 | int idx, i; | |
402 | ||
403 | BUG_ON(dev == NULL); | |
404 | ||
405 | idx = srcu_read_lock(&kfd_processes_srcu); | |
406 | ||
407 | hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes) | |
408 | if (p->pasid == pasid) | |
409 | break; | |
410 | ||
411 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
412 | ||
413 | BUG_ON(p->pasid != pasid); | |
414 | ||
415 | mutex_lock(&p->mutex); | |
416 | ||
c3447e81 BG |
417 | if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) |
418 | kfd_dbgmgr_destroy(dev->dbgmgr); | |
419 | ||
45102048 BG |
420 | pqm_uninit(&p->pqm); |
421 | ||
093c7d8c | 422 | pdd = kfd_get_process_device_data(dev, p); |
a0f67441 MS |
423 | |
424 | if (!pdd) { | |
425 | mutex_unlock(&p->mutex); | |
426 | return; | |
427 | } | |
428 | ||
a82918f1 BG |
429 | if (pdd->reset_wavefronts) { |
430 | dbgdev_wave_reset_wavefronts(pdd->dev, p); | |
431 | pdd->reset_wavefronts = false; | |
432 | } | |
19f6d2a6 OG |
433 | |
434 | /* | |
435 | * Just mark pdd as unbound, because we still need it to call | |
436 | * amd_iommu_unbind_pasid() in when the process exits. | |
437 | * We don't call amd_iommu_unbind_pasid() here | |
438 | * because the IOMMU called us. | |
439 | */ | |
a0f67441 | 440 | pdd->bound = false; |
19f6d2a6 OG |
441 | |
442 | mutex_unlock(&p->mutex); | |
443 | } | |
444 | ||
445 | struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) | |
446 | { | |
447 | return list_first_entry(&p->per_device_data, | |
448 | struct kfd_process_device, | |
449 | per_device_list); | |
450 | } | |
451 | ||
452 | struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, | |
453 | struct kfd_process_device *pdd) | |
454 | { | |
455 | if (list_is_last(&pdd->per_device_list, &p->per_device_data)) | |
456 | return NULL; | |
457 | return list_next_entry(pdd, per_device_list); | |
458 | } | |
459 | ||
460 | bool kfd_has_process_device_data(struct kfd_process *p) | |
461 | { | |
462 | return !(list_empty(&p->per_device_data)); | |
463 | } | |
f3a39818 AL |
464 | |
465 | /* This returns with process->mutex locked. */ | |
466 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) | |
467 | { | |
468 | struct kfd_process *p; | |
469 | unsigned int temp; | |
470 | ||
471 | int idx = srcu_read_lock(&kfd_processes_srcu); | |
472 | ||
473 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | |
474 | if (p->pasid == pasid) { | |
475 | mutex_lock(&p->mutex); | |
476 | break; | |
477 | } | |
478 | } | |
479 | ||
480 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
481 | ||
482 | return p; | |
483 | } |