Commit | Line | Data |
---|---|---|
19f6d2a6 OG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #include <linux/mutex.h> | |
24 | #include <linux/log2.h> | |
25 | #include <linux/sched.h> | |
26 | #include <linux/slab.h> | |
b17f068a | 27 | #include <linux/amd-iommu.h> |
19f6d2a6 OG |
28 | #include <linux/notifier.h> |
29 | struct mm_struct; | |
30 | ||
31 | #include "kfd_priv.h" | |
32 | ||
33 | /* | |
34 | * Initial size for the array of queues. | |
35 | * The allocated size is doubled each time | |
36 | * it is exceeded up to MAX_PROCESS_QUEUES. | |
37 | */ | |
38 | #define INITIAL_QUEUE_ARRAY_SIZE 16 | |
39 | ||
40 | /* | |
41 | * List of struct kfd_process (field kfd_process). | |
42 | * Unique/indexed by mm_struct* | |
43 | */ | |
44 | #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ | |
45 | static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); | |
46 | static DEFINE_MUTEX(kfd_processes_mutex); | |
47 | ||
48 | DEFINE_STATIC_SRCU(kfd_processes_srcu); | |
49 | ||
50 | static struct workqueue_struct *kfd_process_wq; | |
51 | ||
52 | struct kfd_process_release_work { | |
53 | struct work_struct kfd_work; | |
54 | struct kfd_process *p; | |
55 | }; | |
56 | ||
57 | static struct kfd_process *find_process(const struct task_struct *thread); | |
58 | static struct kfd_process *create_process(const struct task_struct *thread); | |
59 | ||
60 | void kfd_process_create_wq(void) | |
61 | { | |
62 | if (!kfd_process_wq) | |
63 | kfd_process_wq = create_workqueue("kfd_process_wq"); | |
64 | } | |
65 | ||
66 | void kfd_process_destroy_wq(void) | |
67 | { | |
68 | if (kfd_process_wq) { | |
69 | flush_workqueue(kfd_process_wq); | |
70 | destroy_workqueue(kfd_process_wq); | |
71 | kfd_process_wq = NULL; | |
72 | } | |
73 | } | |
74 | ||
75 | struct kfd_process *kfd_create_process(const struct task_struct *thread) | |
76 | { | |
77 | struct kfd_process *process; | |
78 | ||
79 | BUG_ON(!kfd_process_wq); | |
80 | ||
81 | if (thread->mm == NULL) | |
82 | return ERR_PTR(-EINVAL); | |
83 | ||
84 | /* Only the pthreads threading model is supported. */ | |
85 | if (thread->group_leader->mm != thread->mm) | |
86 | return ERR_PTR(-EINVAL); | |
87 | ||
88 | /* Take mmap_sem because we call __mmu_notifier_register inside */ | |
89 | down_write(&thread->mm->mmap_sem); | |
90 | ||
91 | /* | |
92 | * take kfd processes mutex before starting of process creation | |
93 | * so there won't be a case where two threads of the same process | |
94 | * create two kfd_process structures | |
95 | */ | |
96 | mutex_lock(&kfd_processes_mutex); | |
97 | ||
98 | /* A prior open of /dev/kfd could have already created the process. */ | |
99 | process = find_process(thread); | |
100 | if (process) | |
101 | pr_debug("kfd: process already found\n"); | |
102 | ||
103 | if (!process) | |
104 | process = create_process(thread); | |
105 | ||
106 | mutex_unlock(&kfd_processes_mutex); | |
107 | ||
108 | up_write(&thread->mm->mmap_sem); | |
109 | ||
110 | return process; | |
111 | } | |
112 | ||
113 | struct kfd_process *kfd_get_process(const struct task_struct *thread) | |
114 | { | |
115 | struct kfd_process *process; | |
116 | ||
117 | if (thread->mm == NULL) | |
118 | return ERR_PTR(-EINVAL); | |
119 | ||
120 | /* Only the pthreads threading model is supported. */ | |
121 | if (thread->group_leader->mm != thread->mm) | |
122 | return ERR_PTR(-EINVAL); | |
123 | ||
124 | process = find_process(thread); | |
125 | ||
126 | return process; | |
127 | } | |
128 | ||
129 | static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) | |
130 | { | |
131 | struct kfd_process *process; | |
132 | ||
133 | hash_for_each_possible_rcu(kfd_processes_table, process, | |
134 | kfd_processes, (uintptr_t)mm) | |
135 | if (process->mm == mm) | |
136 | return process; | |
137 | ||
138 | return NULL; | |
139 | } | |
140 | ||
141 | static struct kfd_process *find_process(const struct task_struct *thread) | |
142 | { | |
143 | struct kfd_process *p; | |
144 | int idx; | |
145 | ||
146 | idx = srcu_read_lock(&kfd_processes_srcu); | |
147 | p = find_process_by_mm(thread->mm); | |
148 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
149 | ||
150 | return p; | |
151 | } | |
152 | ||
153 | static void kfd_process_wq_release(struct work_struct *work) | |
154 | { | |
155 | struct kfd_process_release_work *my_work; | |
156 | struct kfd_process_device *pdd, *temp; | |
157 | struct kfd_process *p; | |
158 | ||
159 | my_work = (struct kfd_process_release_work *) work; | |
160 | ||
161 | p = my_work->p; | |
162 | ||
163 | mutex_lock(&p->mutex); | |
164 | ||
165 | list_for_each_entry_safe(pdd, temp, &p->per_device_data, | |
166 | per_device_list) { | |
b17f068a | 167 | amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); |
19f6d2a6 OG |
168 | list_del(&pdd->per_device_list); |
169 | ||
170 | kfree(pdd); | |
171 | } | |
172 | ||
173 | kfd_pasid_free(p->pasid); | |
174 | ||
175 | mutex_unlock(&p->mutex); | |
176 | ||
177 | mutex_destroy(&p->mutex); | |
178 | ||
179 | kfree(p->queues); | |
180 | ||
181 | kfree(p); | |
182 | ||
183 | kfree((void *)work); | |
184 | } | |
185 | ||
186 | static void kfd_process_destroy_delayed(struct rcu_head *rcu) | |
187 | { | |
188 | struct kfd_process_release_work *work; | |
189 | struct kfd_process *p; | |
190 | ||
191 | BUG_ON(!kfd_process_wq); | |
192 | ||
193 | p = container_of(rcu, struct kfd_process, rcu); | |
194 | BUG_ON(atomic_read(&p->mm->mm_count) <= 0); | |
195 | ||
196 | mmdrop(p->mm); | |
197 | ||
198 | work = (struct kfd_process_release_work *) | |
c448a142 | 199 | kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); |
19f6d2a6 OG |
200 | |
201 | if (work) { | |
202 | INIT_WORK((struct work_struct *) work, kfd_process_wq_release); | |
203 | work->p = p; | |
204 | queue_work(kfd_process_wq, (struct work_struct *) work); | |
205 | } | |
206 | } | |
207 | ||
208 | static void kfd_process_notifier_release(struct mmu_notifier *mn, | |
209 | struct mm_struct *mm) | |
210 | { | |
211 | struct kfd_process *p; | |
212 | ||
213 | /* | |
214 | * The kfd_process structure can not be free because the | |
215 | * mmu_notifier srcu is read locked | |
216 | */ | |
217 | p = container_of(mn, struct kfd_process, mmu_notifier); | |
218 | BUG_ON(p->mm != mm); | |
219 | ||
220 | mutex_lock(&kfd_processes_mutex); | |
221 | hash_del_rcu(&p->kfd_processes); | |
222 | mutex_unlock(&kfd_processes_mutex); | |
223 | synchronize_srcu(&kfd_processes_srcu); | |
224 | ||
45102048 BG |
225 | mutex_lock(&p->mutex); |
226 | ||
227 | /* In case our notifier is called before IOMMU notifier */ | |
228 | pqm_uninit(&p->pqm); | |
229 | ||
230 | mutex_unlock(&p->mutex); | |
231 | ||
19f6d2a6 OG |
232 | /* |
233 | * Because we drop mm_count inside kfd_process_destroy_delayed | |
234 | * and because the mmu_notifier_unregister function also drop | |
235 | * mm_count we need to take an extra count here. | |
236 | */ | |
237 | atomic_inc(&p->mm->mm_count); | |
238 | mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); | |
239 | mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); | |
240 | } | |
241 | ||
242 | static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { | |
243 | .release = kfd_process_notifier_release, | |
244 | }; | |
245 | ||
246 | static struct kfd_process *create_process(const struct task_struct *thread) | |
247 | { | |
248 | struct kfd_process *process; | |
249 | int err = -ENOMEM; | |
250 | ||
251 | process = kzalloc(sizeof(*process), GFP_KERNEL); | |
252 | ||
253 | if (!process) | |
254 | goto err_alloc_process; | |
255 | ||
256 | process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, | |
257 | sizeof(process->queues[0]), GFP_KERNEL); | |
258 | if (!process->queues) | |
259 | goto err_alloc_queues; | |
260 | ||
261 | process->pasid = kfd_pasid_alloc(); | |
262 | if (process->pasid == 0) | |
263 | goto err_alloc_pasid; | |
264 | ||
265 | mutex_init(&process->mutex); | |
266 | ||
267 | process->mm = thread->mm; | |
268 | ||
269 | /* register notifier */ | |
270 | process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; | |
271 | err = __mmu_notifier_register(&process->mmu_notifier, process->mm); | |
272 | if (err) | |
273 | goto err_mmu_notifier; | |
274 | ||
275 | hash_add_rcu(kfd_processes_table, &process->kfd_processes, | |
276 | (uintptr_t)process->mm); | |
277 | ||
278 | process->lead_thread = thread->group_leader; | |
279 | ||
280 | process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; | |
281 | ||
282 | INIT_LIST_HEAD(&process->per_device_data); | |
283 | ||
45102048 BG |
284 | err = pqm_init(&process->pqm, process); |
285 | if (err != 0) | |
286 | goto err_process_pqm_init; | |
287 | ||
19f6d2a6 OG |
288 | return process; |
289 | ||
45102048 BG |
290 | err_process_pqm_init: |
291 | hash_del_rcu(&process->kfd_processes); | |
292 | synchronize_rcu(); | |
293 | mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); | |
19f6d2a6 OG |
294 | err_mmu_notifier: |
295 | kfd_pasid_free(process->pasid); | |
296 | err_alloc_pasid: | |
297 | kfree(process->queues); | |
298 | err_alloc_queues: | |
299 | kfree(process); | |
300 | err_alloc_process: | |
301 | return ERR_PTR(err); | |
302 | } | |
303 | ||
304 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, | |
305 | struct kfd_process *p, | |
306 | int create_pdd) | |
307 | { | |
308 | struct kfd_process_device *pdd = NULL; | |
309 | ||
310 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | |
311 | if (pdd->dev == dev) | |
312 | return pdd; | |
313 | ||
314 | if (create_pdd) { | |
315 | pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); | |
316 | if (pdd != NULL) { | |
317 | pdd->dev = dev; | |
45102048 BG |
318 | INIT_LIST_HEAD(&pdd->qpd.queues_list); |
319 | INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); | |
320 | pdd->qpd.dqm = dev->dqm; | |
19f6d2a6 OG |
321 | list_add(&pdd->per_device_list, &p->per_device_data); |
322 | } | |
323 | } | |
324 | ||
325 | return pdd; | |
326 | } | |
327 | ||
328 | /* | |
329 | * Direct the IOMMU to bind the process (specifically the pasid->mm) | |
330 | * to the device. | |
331 | * Unbinding occurs when the process dies or the device is removed. | |
332 | * | |
333 | * Assumes that the process lock is held. | |
334 | */ | |
335 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, | |
336 | struct kfd_process *p) | |
337 | { | |
338 | struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1); | |
b17f068a | 339 | int err; |
19f6d2a6 OG |
340 | |
341 | if (pdd == NULL) | |
342 | return ERR_PTR(-ENOMEM); | |
343 | ||
344 | if (pdd->bound) | |
345 | return pdd; | |
346 | ||
b17f068a OG |
347 | err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); |
348 | if (err < 0) | |
349 | return ERR_PTR(err); | |
350 | ||
19f6d2a6 OG |
351 | pdd->bound = true; |
352 | ||
353 | return pdd; | |
354 | } | |
355 | ||
356 | void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) | |
357 | { | |
358 | struct kfd_process *p; | |
359 | struct kfd_process_device *pdd; | |
360 | int idx, i; | |
361 | ||
362 | BUG_ON(dev == NULL); | |
363 | ||
364 | idx = srcu_read_lock(&kfd_processes_srcu); | |
365 | ||
366 | hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes) | |
367 | if (p->pasid == pasid) | |
368 | break; | |
369 | ||
370 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
371 | ||
372 | BUG_ON(p->pasid != pasid); | |
373 | ||
374 | mutex_lock(&p->mutex); | |
375 | ||
45102048 BG |
376 | pqm_uninit(&p->pqm); |
377 | ||
19f6d2a6 OG |
378 | pdd = kfd_get_process_device_data(dev, p, 0); |
379 | ||
380 | /* | |
381 | * Just mark pdd as unbound, because we still need it to call | |
382 | * amd_iommu_unbind_pasid() in when the process exits. | |
383 | * We don't call amd_iommu_unbind_pasid() here | |
384 | * because the IOMMU called us. | |
385 | */ | |
386 | if (pdd) | |
387 | pdd->bound = false; | |
388 | ||
389 | mutex_unlock(&p->mutex); | |
390 | } | |
391 | ||
392 | struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) | |
393 | { | |
394 | return list_first_entry(&p->per_device_data, | |
395 | struct kfd_process_device, | |
396 | per_device_list); | |
397 | } | |
398 | ||
399 | struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, | |
400 | struct kfd_process_device *pdd) | |
401 | { | |
402 | if (list_is_last(&pdd->per_device_list, &p->per_device_data)) | |
403 | return NULL; | |
404 | return list_next_entry(pdd, per_device_list); | |
405 | } | |
406 | ||
407 | bool kfd_has_process_device_data(struct kfd_process *p) | |
408 | { | |
409 | return !(list_empty(&p->per_device_data)); | |
410 | } |