Commit | Line | Data |
---|---|---|
d87f36a0 | 1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
f3a39818 | 2 | /* |
d87f36a0 | 3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
f3a39818 AL |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
21 | * OTHER DEALINGS IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | #include <linux/mm_types.h> | |
25 | #include <linux/slab.h> | |
26 | #include <linux/types.h> | |
3f07c014 | 27 | #include <linux/sched/signal.h> |
9b56bb11 | 28 | #include <linux/sched/mm.h> |
f3a39818 | 29 | #include <linux/uaccess.h> |
f3a39818 AL |
30 | #include <linux/mman.h> |
31 | #include <linux/memory.h> | |
32 | #include "kfd_priv.h" | |
33 | #include "kfd_events.h" | |
64d1c3a4 | 34 | #include "kfd_iommu.h" |
59d3e8be | 35 | #include <linux/device.h> |
f3a39818 AL |
36 | |
37 | /* | |
74e40716 | 38 | * Wrapper around wait_queue_entry_t |
f3a39818 AL |
39 | */ |
40 | struct kfd_event_waiter { | |
74e40716 FK |
41 | wait_queue_entry_t wait; |
42 | struct kfd_event *event; /* Event to wait for */ | |
43 | bool activated; /* Becomes true when event is signaled */ | |
f3a39818 AL |
44 | }; |
45 | ||
46 | /* | |
f3a39818 | 47 | * Each signal event needs a 64-bit signal slot where the signaler will write |
482f0777 | 48 | * a 1 before sending an interrupt. (This is needed because some interrupts |
f3a39818 | 49 | * do not contain enough spare data bits to identify an event.) |
482f0777 FK |
50 | * We get whole pages and map them to the process VA. |
51 | * Individual signal events use their event_id as slot index. | |
f3a39818 | 52 | */ |
50cb7dd9 | 53 | struct kfd_signal_page { |
f3a39818 AL |
54 | uint64_t *kernel_address; |
55 | uint64_t __user *user_address; | |
0fc8011f | 56 | bool need_to_free_pages; |
f3a39818 AL |
57 | }; |
58 | ||
50cb7dd9 | 59 | static uint64_t *page_slots(struct kfd_signal_page *page) |
f3a39818 AL |
60 | { |
61 | return page->kernel_address; | |
62 | } | |
63 | ||
50cb7dd9 | 64 | static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) |
f3a39818 AL |
65 | { |
66 | void *backing_store; | |
50cb7dd9 | 67 | struct kfd_signal_page *page; |
f3a39818 | 68 | |
50cb7dd9 | 69 | page = kzalloc(sizeof(*page), GFP_KERNEL); |
f3a39818 | 70 | if (!page) |
50cb7dd9 | 71 | return NULL; |
f3a39818 | 72 | |
50cb7dd9 | 73 | backing_store = (void *) __get_free_pages(GFP_KERNEL, |
f3a39818 AL |
74 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); |
75 | if (!backing_store) | |
76 | goto fail_alloc_signal_store; | |
77 | ||
50cb7dd9 | 78 | /* Initialize all events to unsignaled */ |
f3a39818 | 79 | memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, |
50cb7dd9 | 80 | KFD_SIGNAL_EVENT_LIMIT * 8); |
f3a39818 AL |
81 | |
82 | page->kernel_address = backing_store; | |
0fc8011f | 83 | page->need_to_free_pages = true; |
79775b62 | 84 | pr_debug("Allocated new event signal page at %p, for process %p\n", |
f3a39818 | 85 | page, p); |
f3a39818 | 86 | |
50cb7dd9 | 87 | return page; |
f3a39818 AL |
88 | |
89 | fail_alloc_signal_store: | |
90 | kfree(page); | |
50cb7dd9 | 91 | return NULL; |
f3a39818 AL |
92 | } |
93 | ||
482f0777 | 94 | static int allocate_event_notification_slot(struct kfd_process *p, |
40e8a766 DYS |
95 | struct kfd_event *ev, |
96 | const int *restore_id) | |
f3a39818 | 97 | { |
482f0777 FK |
98 | int id; |
99 | ||
50cb7dd9 FK |
100 | if (!p->signal_page) { |
101 | p->signal_page = allocate_signal_page(p); | |
102 | if (!p->signal_page) | |
482f0777 | 103 | return -ENOMEM; |
b9a5d0a5 FK |
104 | /* Oldest user mode expects 256 event slots */ |
105 | p->signal_mapped_size = 256*8; | |
f3a39818 AL |
106 | } |
107 | ||
40e8a766 DYS |
108 | if (restore_id) { |
109 | id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1, | |
110 | GFP_KERNEL); | |
111 | } else { | |
112 | /* | |
113 | * Compatibility with old user mode: Only use signal slots | |
114 | * user mode has mapped, may be less than | |
115 | * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase | |
116 | * of the event limit without breaking user mode. | |
117 | */ | |
118 | id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, | |
119 | GFP_KERNEL); | |
120 | } | |
482f0777 FK |
121 | if (id < 0) |
122 | return id; | |
f3a39818 | 123 | |
482f0777 FK |
124 | ev->event_id = id; |
125 | page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; | |
f3a39818 | 126 | |
482f0777 | 127 | return 0; |
f3a39818 AL |
128 | } |
129 | ||
f3a39818 | 130 | /* |
5273e82c FK |
131 | * Assumes that p->event_mutex or rcu_readlock is held and of course that p is |
132 | * not going away. | |
f3a39818 AL |
133 | */ |
134 | static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) | |
135 | { | |
482f0777 | 136 | return idr_find(&p->event_idr, id); |
f3a39818 AL |
137 | } |
138 | ||
3f04f961 FK |
139 | /** |
140 | * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID | |
141 | * @p: Pointer to struct kfd_process | |
142 | * @id: ID to look up | |
143 | * @bits: Number of valid bits in @id | |
144 | * | |
145 | * Finds the first signaled event with a matching partial ID. If no | |
146 | * matching signaled event is found, returns NULL. In that case the | |
147 | * caller should assume that the partial ID is invalid and do an | |
148 | * exhaustive search of all siglaned events. | |
149 | * | |
150 | * If multiple events with the same partial ID signal at the same | |
151 | * time, they will be found one interrupt at a time, not necessarily | |
152 | * in the same order the interrupts occurred. As long as the number of | |
153 | * interrupts is correct, all signaled events will be seen by the | |
154 | * driver. | |
155 | */ | |
156 | static struct kfd_event *lookup_signaled_event_by_partial_id( | |
157 | struct kfd_process *p, uint32_t id, uint32_t bits) | |
158 | { | |
159 | struct kfd_event *ev; | |
160 | ||
161 | if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) | |
162 | return NULL; | |
163 | ||
164 | /* Fast path for the common case that @id is not a partial ID | |
165 | * and we only need a single lookup. | |
166 | */ | |
167 | if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { | |
168 | if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) | |
169 | return NULL; | |
170 | ||
171 | return idr_find(&p->event_idr, id); | |
172 | } | |
173 | ||
174 | /* General case for partial IDs: Iterate over all matching IDs | |
175 | * and find the first one that has signaled. | |
176 | */ | |
177 | for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { | |
178 | if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) | |
179 | continue; | |
180 | ||
181 | ev = idr_find(&p->event_idr, id); | |
182 | } | |
183 | ||
184 | return ev; | |
185 | } | |
186 | ||
40e8a766 DYS |
187 | static int create_signal_event(struct file *devkfd, struct kfd_process *p, |
188 | struct kfd_event *ev, const int *restore_id) | |
f3a39818 | 189 | { |
482f0777 FK |
190 | int ret; |
191 | ||
b9a5d0a5 FK |
192 | if (p->signal_mapped_size && |
193 | p->signal_event_count == p->signal_mapped_size / 8) { | |
c986169f | 194 | if (!p->signal_event_limit_reached) { |
8f2e0c03 | 195 | pr_debug("Signal event wasn't created because limit was reached\n"); |
c986169f FK |
196 | p->signal_event_limit_reached = true; |
197 | } | |
482f0777 | 198 | return -ENOSPC; |
f3a39818 AL |
199 | } |
200 | ||
40e8a766 | 201 | ret = allocate_event_notification_slot(p, ev, restore_id); |
482f0777 | 202 | if (ret) { |
79775b62 | 203 | pr_warn("Signal event wasn't created because out of kernel memory\n"); |
482f0777 | 204 | return ret; |
f3a39818 AL |
205 | } |
206 | ||
207 | p->signal_event_count++; | |
208 | ||
482f0777 | 209 | ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; |
79775b62 | 210 | pr_debug("Signal event number %zu created with id %d, address %p\n", |
6235e15e OG |
211 | p->signal_event_count, ev->event_id, |
212 | ev->user_signal_address); | |
213 | ||
f3a39818 AL |
214 | return 0; |
215 | } | |
216 | ||
40e8a766 | 217 | static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id) |
f3a39818 | 218 | { |
40e8a766 DYS |
219 | int id; |
220 | ||
221 | if (restore_id) | |
222 | id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1, | |
223 | GFP_KERNEL); | |
224 | else | |
225 | /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an | |
226 | * intentional integer overflow to -1 without a compiler | |
227 | * warning. idr_alloc treats a negative value as "maximum | |
228 | * signed integer". | |
229 | */ | |
230 | id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, | |
231 | (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, | |
232 | GFP_KERNEL); | |
482f0777 FK |
233 | |
234 | if (id < 0) | |
235 | return id; | |
236 | ev->event_id = id; | |
f3a39818 AL |
237 | |
238 | return 0; | |
239 | } | |
240 | ||
c3eb12df | 241 | int kfd_event_init_process(struct kfd_process *p) |
f3a39818 | 242 | { |
c3eb12df FK |
243 | int id; |
244 | ||
f3a39818 | 245 | mutex_init(&p->event_mutex); |
482f0777 | 246 | idr_init(&p->event_idr); |
50cb7dd9 | 247 | p->signal_page = NULL; |
c3eb12df FK |
248 | p->signal_event_count = 1; |
249 | /* Allocate event ID 0. It is used for a fast path to ignore bogus events | |
250 | * that are sent by the CP without a context ID | |
251 | */ | |
252 | id = idr_alloc(&p->event_idr, NULL, 0, 1, GFP_KERNEL); | |
253 | if (id < 0) { | |
254 | idr_destroy(&p->event_idr); | |
255 | mutex_destroy(&p->event_mutex); | |
256 | return id; | |
257 | } | |
258 | return 0; | |
f3a39818 AL |
259 | } |
260 | ||
261 | static void destroy_event(struct kfd_process *p, struct kfd_event *ev) | |
262 | { | |
74e40716 | 263 | struct kfd_event_waiter *waiter; |
fe528c13 | 264 | |
74e40716 | 265 | /* Wake up pending waiters. They will return failure */ |
5273e82c | 266 | spin_lock(&ev->lock); |
74e40716 | 267 | list_for_each_entry(waiter, &ev->wq.head, wait.entry) |
5273e82c | 268 | WRITE_ONCE(waiter->event, NULL); |
74e40716 | 269 | wake_up_all(&ev->wq); |
5273e82c | 270 | spin_unlock(&ev->lock); |
fe528c13 | 271 | |
482f0777 FK |
272 | if (ev->type == KFD_EVENT_TYPE_SIGNAL || |
273 | ev->type == KFD_EVENT_TYPE_DEBUG) | |
f3a39818 | 274 | p->signal_event_count--; |
f3a39818 | 275 | |
482f0777 | 276 | idr_remove(&p->event_idr, ev->event_id); |
34d292d5 | 277 | kfree_rcu(ev, rcu); |
f3a39818 AL |
278 | } |
279 | ||
280 | static void destroy_events(struct kfd_process *p) | |
281 | { | |
282 | struct kfd_event *ev; | |
482f0777 | 283 | uint32_t id; |
f3a39818 | 284 | |
482f0777 | 285 | idr_for_each_entry(&p->event_idr, ev, id) |
c3eb12df FK |
286 | if (ev) |
287 | destroy_event(p, ev); | |
482f0777 | 288 | idr_destroy(&p->event_idr); |
c3eb12df | 289 | mutex_destroy(&p->event_mutex); |
f3a39818 AL |
290 | } |
291 | ||
292 | /* | |
293 | * We assume that the process is being destroyed and there is no need to | |
294 | * unmap the pages or keep bookkeeping data in order. | |
295 | */ | |
50cb7dd9 | 296 | static void shutdown_signal_page(struct kfd_process *p) |
f3a39818 | 297 | { |
50cb7dd9 | 298 | struct kfd_signal_page *page = p->signal_page; |
f3a39818 | 299 | |
50cb7dd9 | 300 | if (page) { |
0fc8011f FK |
301 | if (page->need_to_free_pages) |
302 | free_pages((unsigned long)page->kernel_address, | |
303 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); | |
f3a39818 AL |
304 | kfree(page); |
305 | } | |
306 | } | |
307 | ||
308 | void kfd_event_free_process(struct kfd_process *p) | |
309 | { | |
310 | destroy_events(p); | |
50cb7dd9 | 311 | shutdown_signal_page(p); |
f3a39818 AL |
312 | } |
313 | ||
314 | static bool event_can_be_gpu_signaled(const struct kfd_event *ev) | |
315 | { | |
316 | return ev->type == KFD_EVENT_TYPE_SIGNAL || | |
317 | ev->type == KFD_EVENT_TYPE_DEBUG; | |
318 | } | |
319 | ||
320 | static bool event_can_be_cpu_signaled(const struct kfd_event *ev) | |
321 | { | |
322 | return ev->type == KFD_EVENT_TYPE_SIGNAL; | |
323 | } | |
324 | ||
40e8a766 DYS |
325 | static int kfd_event_page_set(struct kfd_process *p, void *kernel_address, |
326 | uint64_t size, uint64_t user_handle) | |
0fc8011f FK |
327 | { |
328 | struct kfd_signal_page *page; | |
329 | ||
330 | if (p->signal_page) | |
331 | return -EBUSY; | |
332 | ||
333 | page = kzalloc(sizeof(*page), GFP_KERNEL); | |
334 | if (!page) | |
335 | return -ENOMEM; | |
336 | ||
337 | /* Initialize all events to unsignaled */ | |
338 | memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT, | |
339 | KFD_SIGNAL_EVENT_LIMIT * 8); | |
340 | ||
341 | page->kernel_address = kernel_address; | |
342 | ||
343 | p->signal_page = page; | |
344 | p->signal_mapped_size = size; | |
40e8a766 | 345 | p->signal_handle = user_handle; |
0fc8011f FK |
346 | return 0; |
347 | } | |
348 | ||
40e8a766 DYS |
349 | int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset) |
350 | { | |
351 | struct kfd_dev *kfd; | |
352 | struct kfd_process_device *pdd; | |
353 | void *mem, *kern_addr; | |
354 | uint64_t size; | |
355 | int err = 0; | |
356 | ||
357 | if (p->signal_page) { | |
358 | pr_err("Event page is already set\n"); | |
359 | return -EINVAL; | |
360 | } | |
361 | ||
bef153b7 DYS |
362 | pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset)); |
363 | if (!pdd) { | |
40e8a766 DYS |
364 | pr_err("Getting device by id failed in %s\n", __func__); |
365 | return -EINVAL; | |
366 | } | |
bef153b7 | 367 | kfd = pdd->dev; |
40e8a766 DYS |
368 | |
369 | pdd = kfd_bind_process_to_device(kfd, p); | |
370 | if (IS_ERR(pdd)) | |
371 | return PTR_ERR(pdd); | |
372 | ||
373 | mem = kfd_process_device_translate_handle(pdd, | |
374 | GET_IDR_HANDLE(event_page_offset)); | |
375 | if (!mem) { | |
376 | pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset); | |
377 | return -EINVAL; | |
378 | } | |
379 | ||
4e2d1044 | 380 | err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size); |
40e8a766 DYS |
381 | if (err) { |
382 | pr_err("Failed to map event page to kernel\n"); | |
383 | return err; | |
384 | } | |
385 | ||
386 | err = kfd_event_page_set(p, kern_addr, size, event_page_offset); | |
387 | if (err) { | |
388 | pr_err("Failed to set event page\n"); | |
4e2d1044 | 389 | amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); |
40e8a766 DYS |
390 | return err; |
391 | } | |
392 | return err; | |
393 | } | |
394 | ||
f3a39818 AL |
395 | int kfd_event_create(struct file *devkfd, struct kfd_process *p, |
396 | uint32_t event_type, bool auto_reset, uint32_t node_id, | |
397 | uint32_t *event_id, uint32_t *event_trigger_data, | |
398 | uint64_t *event_page_offset, uint32_t *event_slot_index) | |
399 | { | |
400 | int ret = 0; | |
401 | struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL); | |
402 | ||
403 | if (!ev) | |
404 | return -ENOMEM; | |
405 | ||
406 | ev->type = event_type; | |
407 | ev->auto_reset = auto_reset; | |
408 | ev->signaled = false; | |
409 | ||
5273e82c | 410 | spin_lock_init(&ev->lock); |
74e40716 | 411 | init_waitqueue_head(&ev->wq); |
f3a39818 AL |
412 | |
413 | *event_page_offset = 0; | |
414 | ||
415 | mutex_lock(&p->event_mutex); | |
416 | ||
417 | switch (event_type) { | |
418 | case KFD_EVENT_TYPE_SIGNAL: | |
419 | case KFD_EVENT_TYPE_DEBUG: | |
40e8a766 | 420 | ret = create_signal_event(devkfd, p, ev, NULL); |
f3a39818 | 421 | if (!ret) { |
df03ef93 | 422 | *event_page_offset = KFD_MMAP_TYPE_EVENTS; |
482f0777 | 423 | *event_slot_index = ev->event_id; |
f3a39818 AL |
424 | } |
425 | break; | |
426 | default: | |
40e8a766 | 427 | ret = create_other_event(p, ev, NULL); |
f3a39818 AL |
428 | break; |
429 | } | |
430 | ||
431 | if (!ret) { | |
f3a39818 AL |
432 | *event_id = ev->event_id; |
433 | *event_trigger_data = ev->event_id; | |
434 | } else { | |
435 | kfree(ev); | |
436 | } | |
437 | ||
438 | mutex_unlock(&p->event_mutex); | |
439 | ||
440 | return ret; | |
441 | } | |
442 | ||
40e8a766 DYS |
443 | int kfd_criu_restore_event(struct file *devkfd, |
444 | struct kfd_process *p, | |
445 | uint8_t __user *user_priv_ptr, | |
446 | uint64_t *priv_data_offset, | |
447 | uint64_t max_priv_data_size) | |
448 | { | |
449 | struct kfd_criu_event_priv_data *ev_priv; | |
450 | struct kfd_event *ev = NULL; | |
451 | int ret = 0; | |
452 | ||
453 | ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL); | |
454 | if (!ev_priv) | |
455 | return -ENOMEM; | |
456 | ||
457 | ev = kzalloc(sizeof(*ev), GFP_KERNEL); | |
458 | if (!ev) { | |
459 | ret = -ENOMEM; | |
460 | goto exit; | |
461 | } | |
462 | ||
463 | if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) { | |
464 | ret = -EINVAL; | |
465 | goto exit; | |
466 | } | |
467 | ||
468 | ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv)); | |
469 | if (ret) { | |
470 | ret = -EFAULT; | |
471 | goto exit; | |
472 | } | |
473 | *priv_data_offset += sizeof(*ev_priv); | |
474 | ||
475 | if (ev_priv->user_handle) { | |
476 | ret = kfd_kmap_event_page(p, ev_priv->user_handle); | |
477 | if (ret) | |
478 | goto exit; | |
479 | } | |
480 | ||
481 | ev->type = ev_priv->type; | |
482 | ev->auto_reset = ev_priv->auto_reset; | |
483 | ev->signaled = ev_priv->signaled; | |
484 | ||
5273e82c | 485 | spin_lock_init(&ev->lock); |
40e8a766 DYS |
486 | init_waitqueue_head(&ev->wq); |
487 | ||
488 | mutex_lock(&p->event_mutex); | |
489 | switch (ev->type) { | |
490 | case KFD_EVENT_TYPE_SIGNAL: | |
491 | case KFD_EVENT_TYPE_DEBUG: | |
492 | ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id); | |
493 | break; | |
494 | case KFD_EVENT_TYPE_MEMORY: | |
495 | memcpy(&ev->memory_exception_data, | |
496 | &ev_priv->memory_exception_data, | |
497 | sizeof(struct kfd_hsa_memory_exception_data)); | |
498 | ||
499 | ret = create_other_event(p, ev, &ev_priv->event_id); | |
500 | break; | |
501 | case KFD_EVENT_TYPE_HW_EXCEPTION: | |
502 | memcpy(&ev->hw_exception_data, | |
503 | &ev_priv->hw_exception_data, | |
504 | sizeof(struct kfd_hsa_hw_exception_data)); | |
505 | ||
506 | ret = create_other_event(p, ev, &ev_priv->event_id); | |
507 | break; | |
508 | } | |
66f79037 | 509 | mutex_unlock(&p->event_mutex); |
40e8a766 DYS |
510 | |
511 | exit: | |
512 | if (ret) | |
513 | kfree(ev); | |
514 | ||
515 | kfree(ev_priv); | |
516 | ||
40e8a766 DYS |
517 | return ret; |
518 | } | |
519 | ||
520 | int kfd_criu_checkpoint_events(struct kfd_process *p, | |
521 | uint8_t __user *user_priv_data, | |
522 | uint64_t *priv_data_offset) | |
523 | { | |
524 | struct kfd_criu_event_priv_data *ev_privs; | |
525 | int i = 0; | |
526 | int ret = 0; | |
527 | struct kfd_event *ev; | |
528 | uint32_t ev_id; | |
529 | ||
530 | uint32_t num_events = kfd_get_num_events(p); | |
531 | ||
532 | if (!num_events) | |
533 | return 0; | |
534 | ||
535 | ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL); | |
536 | if (!ev_privs) | |
537 | return -ENOMEM; | |
538 | ||
539 | ||
540 | idr_for_each_entry(&p->event_idr, ev, ev_id) { | |
541 | struct kfd_criu_event_priv_data *ev_priv; | |
542 | ||
543 | /* | |
544 | * Currently, all events have same size of private_data, but the current ioctl's | |
545 | * and CRIU plugin supports private_data of variable sizes | |
546 | */ | |
547 | ev_priv = &ev_privs[i]; | |
548 | ||
549 | ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT; | |
550 | ||
551 | /* We store the user_handle with the first event */ | |
552 | if (i == 0 && p->signal_page) | |
553 | ev_priv->user_handle = p->signal_handle; | |
554 | ||
555 | ev_priv->event_id = ev->event_id; | |
556 | ev_priv->auto_reset = ev->auto_reset; | |
557 | ev_priv->type = ev->type; | |
558 | ev_priv->signaled = ev->signaled; | |
559 | ||
560 | if (ev_priv->type == KFD_EVENT_TYPE_MEMORY) | |
561 | memcpy(&ev_priv->memory_exception_data, | |
562 | &ev->memory_exception_data, | |
563 | sizeof(struct kfd_hsa_memory_exception_data)); | |
564 | else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION) | |
565 | memcpy(&ev_priv->hw_exception_data, | |
566 | &ev->hw_exception_data, | |
567 | sizeof(struct kfd_hsa_hw_exception_data)); | |
568 | ||
569 | pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n", | |
570 | i, | |
571 | ev_priv->event_id, | |
572 | ev_priv->auto_reset, | |
573 | ev_priv->type, | |
574 | ev_priv->signaled); | |
575 | i++; | |
576 | } | |
577 | ||
578 | ret = copy_to_user(user_priv_data + *priv_data_offset, | |
579 | ev_privs, num_events * sizeof(*ev_privs)); | |
580 | if (ret) { | |
581 | pr_err("Failed to copy events priv to user\n"); | |
582 | ret = -EFAULT; | |
583 | } | |
584 | ||
585 | *priv_data_offset += num_events * sizeof(*ev_privs); | |
586 | ||
587 | kvfree(ev_privs); | |
588 | return ret; | |
589 | } | |
590 | ||
591 | int kfd_get_num_events(struct kfd_process *p) | |
592 | { | |
593 | struct kfd_event *ev; | |
594 | uint32_t id; | |
595 | u32 num_events = 0; | |
596 | ||
597 | idr_for_each_entry(&p->event_idr, ev, id) | |
598 | num_events++; | |
599 | ||
600 | return num_events; | |
601 | } | |
602 | ||
f3a39818 AL |
603 | /* Assumes that p is current. */ |
604 | int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) | |
605 | { | |
606 | struct kfd_event *ev; | |
607 | int ret = 0; | |
608 | ||
609 | mutex_lock(&p->event_mutex); | |
610 | ||
611 | ev = lookup_event_by_id(p, event_id); | |
612 | ||
613 | if (ev) | |
614 | destroy_event(p, ev); | |
615 | else | |
616 | ret = -EINVAL; | |
617 | ||
618 | mutex_unlock(&p->event_mutex); | |
619 | return ret; | |
620 | } | |
621 | ||
622 | static void set_event(struct kfd_event *ev) | |
623 | { | |
624 | struct kfd_event_waiter *waiter; | |
f3a39818 | 625 | |
74e40716 FK |
626 | /* Auto reset if the list is non-empty and we're waking |
627 | * someone. waitqueue_active is safe here because we're | |
5273e82c | 628 | * protected by the ev->lock, which is also held when |
74e40716 FK |
629 | * updating the wait queues in kfd_wait_on_events. |
630 | */ | |
631 | ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); | |
f3a39818 | 632 | |
74e40716 | 633 | list_for_each_entry(waiter, &ev->wq.head, wait.entry) |
5273e82c | 634 | WRITE_ONCE(waiter->activated, true); |
f3a39818 | 635 | |
74e40716 | 636 | wake_up_all(&ev->wq); |
f3a39818 AL |
637 | } |
638 | ||
639 | /* Assumes that p is current. */ | |
640 | int kfd_set_event(struct kfd_process *p, uint32_t event_id) | |
641 | { | |
642 | int ret = 0; | |
643 | struct kfd_event *ev; | |
644 | ||
5273e82c | 645 | rcu_read_lock(); |
f3a39818 AL |
646 | |
647 | ev = lookup_event_by_id(p, event_id); | |
abb5bc59 DC |
648 | if (!ev) { |
649 | ret = -EINVAL; | |
650 | goto unlock_rcu; | |
651 | } | |
5273e82c | 652 | spin_lock(&ev->lock); |
f3a39818 | 653 | |
abb5bc59 | 654 | if (event_can_be_cpu_signaled(ev)) |
f3a39818 AL |
655 | set_event(ev); |
656 | else | |
657 | ret = -EINVAL; | |
658 | ||
5273e82c | 659 | spin_unlock(&ev->lock); |
abb5bc59 | 660 | unlock_rcu: |
5273e82c | 661 | rcu_read_unlock(); |
f3a39818 AL |
662 | return ret; |
663 | } | |
664 | ||
665 | static void reset_event(struct kfd_event *ev) | |
666 | { | |
667 | ev->signaled = false; | |
668 | } | |
669 | ||
670 | /* Assumes that p is current. */ | |
671 | int kfd_reset_event(struct kfd_process *p, uint32_t event_id) | |
672 | { | |
673 | int ret = 0; | |
674 | struct kfd_event *ev; | |
675 | ||
5273e82c | 676 | rcu_read_lock(); |
f3a39818 AL |
677 | |
678 | ev = lookup_event_by_id(p, event_id); | |
abb5bc59 DC |
679 | if (!ev) { |
680 | ret = -EINVAL; | |
681 | goto unlock_rcu; | |
682 | } | |
5273e82c | 683 | spin_lock(&ev->lock); |
f3a39818 | 684 | |
abb5bc59 | 685 | if (event_can_be_cpu_signaled(ev)) |
f3a39818 AL |
686 | reset_event(ev); |
687 | else | |
688 | ret = -EINVAL; | |
689 | ||
5273e82c | 690 | spin_unlock(&ev->lock); |
abb5bc59 | 691 | unlock_rcu: |
5273e82c | 692 | rcu_read_unlock(); |
f3a39818 AL |
693 | return ret; |
694 | ||
695 | } | |
696 | ||
697 | static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) | |
698 | { | |
5273e82c | 699 | WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT); |
f3a39818 AL |
700 | } |
701 | ||
702 | static void set_event_from_interrupt(struct kfd_process *p, | |
703 | struct kfd_event *ev) | |
704 | { | |
705 | if (ev && event_can_be_gpu_signaled(ev)) { | |
706 | acknowledge_signal(p, ev); | |
5273e82c | 707 | spin_lock(&ev->lock); |
f3a39818 | 708 | set_event(ev); |
5273e82c | 709 | spin_unlock(&ev->lock); |
f3a39818 AL |
710 | } |
711 | } | |
712 | ||
c7b6bac9 | 713 | void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, |
f3a39818 AL |
714 | uint32_t valid_id_bits) |
715 | { | |
3f04f961 | 716 | struct kfd_event *ev = NULL; |
f3a39818 AL |
717 | |
718 | /* | |
719 | * Because we are called from arbitrary context (workqueue) as opposed | |
720 | * to process context, kfd_process could attempt to exit while we are | |
abb208a8 | 721 | * running so the lookup function increments the process ref count. |
f3a39818 AL |
722 | */ |
723 | struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); | |
724 | ||
725 | if (!p) | |
726 | return; /* Presumably process exited. */ | |
727 | ||
5273e82c | 728 | rcu_read_lock(); |
f3a39818 | 729 | |
3f04f961 FK |
730 | if (valid_id_bits) |
731 | ev = lookup_signaled_event_by_partial_id(p, partial_id, | |
732 | valid_id_bits); | |
733 | if (ev) { | |
f3a39818 | 734 | set_event_from_interrupt(p, ev); |
50cb7dd9 | 735 | } else if (p->signal_page) { |
f3a39818 | 736 | /* |
3f04f961 FK |
737 | * Partial ID lookup failed. Assume that the event ID |
738 | * in the interrupt payload was invalid and do an | |
739 | * exhaustive search of signaled events. | |
f3a39818 | 740 | */ |
482f0777 FK |
741 | uint64_t *slots = page_slots(p->signal_page); |
742 | uint32_t id; | |
f3a39818 | 743 | |
3f04f961 FK |
744 | if (valid_id_bits) |
745 | pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", | |
746 | partial_id, valid_id_bits); | |
747 | ||
eeb27b7e | 748 | if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) { |
482f0777 FK |
749 | /* With relatively few events, it's faster to |
750 | * iterate over the event IDR | |
751 | */ | |
752 | idr_for_each_entry(&p->event_idr, ev, id) { | |
753 | if (id >= KFD_SIGNAL_EVENT_LIMIT) | |
754 | break; | |
755 | ||
5273e82c | 756 | if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) |
482f0777 | 757 | set_event_from_interrupt(p, ev); |
50cb7dd9 | 758 | } |
482f0777 FK |
759 | } else { |
760 | /* With relatively many events, it's faster to | |
761 | * iterate over the signal slots and lookup | |
762 | * only signaled events from the IDR. | |
763 | */ | |
c3eb12df | 764 | for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++) |
5273e82c | 765 | if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) { |
482f0777 FK |
766 | ev = lookup_event_by_id(p, id); |
767 | set_event_from_interrupt(p, ev); | |
768 | } | |
769 | } | |
f3a39818 AL |
770 | } |
771 | ||
5273e82c | 772 | rcu_read_unlock(); |
abb208a8 | 773 | kfd_unref_process(p); |
f3a39818 AL |
774 | } |
775 | ||
776 | static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) | |
777 | { | |
778 | struct kfd_event_waiter *event_waiters; | |
779 | uint32_t i; | |
780 | ||
781 | event_waiters = kmalloc_array(num_events, | |
782 | sizeof(struct kfd_event_waiter), | |
783 | GFP_KERNEL); | |
ebbb7bb9 Q |
784 | if (!event_waiters) |
785 | return NULL; | |
f3a39818 AL |
786 | |
787 | for (i = 0; (event_waiters) && (i < num_events) ; i++) { | |
74e40716 | 788 | init_wait(&event_waiters[i].wait); |
f3a39818 AL |
789 | event_waiters[i].activated = false; |
790 | } | |
791 | ||
792 | return event_waiters; | |
793 | } | |
794 | ||
250e64a3 | 795 | static int init_event_waiter(struct kfd_process *p, |
59d3e8be | 796 | struct kfd_event_waiter *waiter, |
ebf947fe | 797 | uint32_t event_id) |
f3a39818 AL |
798 | { |
799 | struct kfd_event *ev = lookup_event_by_id(p, event_id); | |
800 | ||
801 | if (!ev) | |
802 | return -EINVAL; | |
803 | ||
5273e82c | 804 | spin_lock(&ev->lock); |
59d3e8be | 805 | waiter->event = ev; |
f3a39818 AL |
806 | waiter->activated = ev->signaled; |
807 | ev->signaled = ev->signaled && !ev->auto_reset; | |
250e64a3 FK |
808 | if (!waiter->activated) |
809 | add_wait_queue(&ev->wq, &waiter->wait); | |
5273e82c | 810 | spin_unlock(&ev->lock); |
f3a39818 | 811 | |
f3a39818 AL |
812 | return 0; |
813 | } | |
814 | ||
fe528c13 FK |
815 | /* test_event_condition - Test condition of events being waited for |
816 | * @all: Return completion only if all events have signaled | |
817 | * @num_events: Number of events to wait for | |
818 | * @event_waiters: Array of event waiters, one per event | |
819 | * | |
820 | * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have | |
821 | * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all) | |
822 | * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of | |
823 | * the events have been destroyed. | |
824 | */ | |
825 | static uint32_t test_event_condition(bool all, uint32_t num_events, | |
f3a39818 AL |
826 | struct kfd_event_waiter *event_waiters) |
827 | { | |
828 | uint32_t i; | |
829 | uint32_t activated_count = 0; | |
830 | ||
831 | for (i = 0; i < num_events; i++) { | |
5273e82c | 832 | if (!READ_ONCE(event_waiters[i].event)) |
fe528c13 FK |
833 | return KFD_IOC_WAIT_RESULT_FAIL; |
834 | ||
5273e82c | 835 | if (READ_ONCE(event_waiters[i].activated)) { |
f3a39818 | 836 | if (!all) |
fe528c13 | 837 | return KFD_IOC_WAIT_RESULT_COMPLETE; |
f3a39818 AL |
838 | |
839 | activated_count++; | |
840 | } | |
841 | } | |
842 | ||
fe528c13 FK |
843 | return activated_count == num_events ? |
844 | KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT; | |
f3a39818 AL |
845 | } |
846 | ||
59d3e8be AS |
847 | /* |
848 | * Copy event specific data, if defined. | |
849 | * Currently only memory exception events have additional data to copy to user | |
850 | */ | |
fdf0c833 | 851 | static int copy_signaled_event_data(uint32_t num_events, |
59d3e8be AS |
852 | struct kfd_event_waiter *event_waiters, |
853 | struct kfd_event_data __user *data) | |
854 | { | |
855 | struct kfd_hsa_memory_exception_data *src; | |
856 | struct kfd_hsa_memory_exception_data __user *dst; | |
857 | struct kfd_event_waiter *waiter; | |
858 | struct kfd_event *event; | |
859 | uint32_t i; | |
860 | ||
861 | for (i = 0; i < num_events; i++) { | |
862 | waiter = &event_waiters[i]; | |
863 | event = waiter->event; | |
5273e82c FK |
864 | if (!event) |
865 | return -EINVAL; /* event was destroyed */ | |
59d3e8be | 866 | if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { |
ebf947fe | 867 | dst = &data[i].memory_exception_data; |
59d3e8be AS |
868 | src = &event->memory_exception_data; |
869 | if (copy_to_user(dst, src, | |
870 | sizeof(struct kfd_hsa_memory_exception_data))) | |
fdf0c833 | 871 | return -EFAULT; |
59d3e8be AS |
872 | } |
873 | } | |
874 | ||
fdf0c833 | 875 | return 0; |
59d3e8be AS |
876 | } |
877 | ||
f3a39818 AL |
878 | static long user_timeout_to_jiffies(uint32_t user_timeout_ms) |
879 | { | |
880 | if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE) | |
881 | return 0; | |
882 | ||
883 | if (user_timeout_ms == KFD_EVENT_TIMEOUT_INFINITE) | |
884 | return MAX_SCHEDULE_TIMEOUT; | |
885 | ||
886 | /* | |
887 | * msecs_to_jiffies interprets all values above 2^31-1 as infinite, | |
888 | * but we consider them finite. | |
889 | * This hack is wrong, but nobody is likely to notice. | |
890 | */ | |
891 | user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF); | |
892 | ||
893 | return msecs_to_jiffies(user_timeout_ms) + 1; | |
894 | } | |
895 | ||
bea9a56a FK |
896 | static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters, |
897 | bool undo_auto_reset) | |
f3a39818 AL |
898 | { |
899 | uint32_t i; | |
900 | ||
901 | for (i = 0; i < num_events; i++) | |
5273e82c FK |
902 | if (waiters[i].event) { |
903 | spin_lock(&waiters[i].event->lock); | |
74e40716 FK |
904 | remove_wait_queue(&waiters[i].event->wq, |
905 | &waiters[i].wait); | |
bea9a56a FK |
906 | if (undo_auto_reset && waiters[i].activated && |
907 | waiters[i].event && waiters[i].event->auto_reset) | |
908 | set_event(waiters[i].event); | |
5273e82c FK |
909 | spin_unlock(&waiters[i].event->lock); |
910 | } | |
f3a39818 AL |
911 | |
912 | kfree(waiters); | |
913 | } | |
914 | ||
915 | int kfd_wait_on_events(struct kfd_process *p, | |
59d3e8be | 916 | uint32_t num_events, void __user *data, |
bea9a56a | 917 | bool all, uint32_t *user_timeout_ms, |
fdf0c833 | 918 | uint32_t *wait_result) |
f3a39818 | 919 | { |
59d3e8be AS |
920 | struct kfd_event_data __user *events = |
921 | (struct kfd_event_data __user *) data; | |
f3a39818 AL |
922 | uint32_t i; |
923 | int ret = 0; | |
1f9d09be | 924 | |
f3a39818 | 925 | struct kfd_event_waiter *event_waiters = NULL; |
bea9a56a | 926 | long timeout = user_timeout_to_jiffies(*user_timeout_ms); |
f3a39818 | 927 | |
fdf0c833 FK |
928 | event_waiters = alloc_event_waiters(num_events); |
929 | if (!event_waiters) { | |
930 | ret = -ENOMEM; | |
931 | goto out; | |
932 | } | |
933 | ||
5273e82c FK |
934 | /* Use p->event_mutex here to protect against concurrent creation and |
935 | * destruction of events while we initialize event_waiters. | |
936 | */ | |
f3a39818 AL |
937 | mutex_lock(&p->event_mutex); |
938 | ||
f3a39818 | 939 | for (i = 0; i < num_events; i++) { |
59d3e8be | 940 | struct kfd_event_data event_data; |
f3a39818 | 941 | |
59d3e8be | 942 | if (copy_from_user(&event_data, &events[i], |
8bf79388 PB |
943 | sizeof(struct kfd_event_data))) { |
944 | ret = -EFAULT; | |
fdf0c833 | 945 | goto out_unlock; |
8bf79388 | 946 | } |
f3a39818 | 947 | |
250e64a3 FK |
948 | ret = init_event_waiter(p, &event_waiters[i], |
949 | event_data.event_id); | |
f3a39818 | 950 | if (ret) |
fdf0c833 | 951 | goto out_unlock; |
f3a39818 AL |
952 | } |
953 | ||
1f9d09be | 954 | /* Check condition once. */ |
fe528c13 FK |
955 | *wait_result = test_event_condition(all, num_events, event_waiters); |
956 | if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) { | |
fdf0c833 FK |
957 | ret = copy_signaled_event_data(num_events, |
958 | event_waiters, events); | |
959 | goto out_unlock; | |
fe528c13 FK |
960 | } else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) { |
961 | /* This should not happen. Events shouldn't be | |
962 | * destroyed while we're holding the event_mutex | |
963 | */ | |
964 | goto out_unlock; | |
1f9d09be SK |
965 | } |
966 | ||
f3a39818 AL |
967 | mutex_unlock(&p->event_mutex); |
968 | ||
969 | while (true) { | |
970 | if (fatal_signal_pending(current)) { | |
971 | ret = -EINTR; | |
972 | break; | |
973 | } | |
974 | ||
975 | if (signal_pending(current)) { | |
f3a39818 | 976 | ret = -ERESTARTSYS; |
bea9a56a FK |
977 | if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE && |
978 | *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE) | |
979 | *user_timeout_ms = jiffies_to_msecs( | |
980 | max(0l, timeout-1)); | |
f3a39818 AL |
981 | break; |
982 | } | |
983 | ||
d9aeec4c SK |
984 | /* Set task state to interruptible sleep before |
985 | * checking wake-up conditions. A concurrent wake-up | |
986 | * will put the task back into runnable state. In that | |
987 | * case schedule_timeout will not put the task to | |
988 | * sleep and we'll get a chance to re-check the | |
989 | * updated conditions almost immediately. Otherwise, | |
990 | * this race condition would lead to a soft hang or a | |
991 | * very long sleep. | |
992 | */ | |
993 | set_current_state(TASK_INTERRUPTIBLE); | |
994 | ||
fe528c13 FK |
995 | *wait_result = test_event_condition(all, num_events, |
996 | event_waiters); | |
997 | if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT) | |
f3a39818 | 998 | break; |
f3a39818 | 999 | |
fe528c13 | 1000 | if (timeout <= 0) |
f3a39818 | 1001 | break; |
f3a39818 | 1002 | |
d9aeec4c | 1003 | timeout = schedule_timeout(timeout); |
f3a39818 AL |
1004 | } |
1005 | __set_current_state(TASK_RUNNING); | |
1006 | ||
5273e82c | 1007 | mutex_lock(&p->event_mutex); |
fdf0c833 FK |
1008 | /* copy_signaled_event_data may sleep. So this has to happen |
1009 | * after the task state is set back to RUNNING. | |
5273e82c FK |
1010 | * |
1011 | * The event may also have been destroyed after signaling. So | |
1012 | * copy_signaled_event_data also must confirm that the event | |
1013 | * still exists. Therefore this must be under the p->event_mutex | |
1014 | * which is also held when events are destroyed. | |
fdf0c833 FK |
1015 | */ |
1016 | if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) | |
1017 | ret = copy_signaled_event_data(num_events, | |
1018 | event_waiters, events); | |
1019 | ||
fdf0c833 | 1020 | out_unlock: |
bea9a56a | 1021 | free_waiters(num_events, event_waiters, ret == -ERESTARTSYS); |
f3a39818 | 1022 | mutex_unlock(&p->event_mutex); |
fdf0c833 FK |
1023 | out: |
1024 | if (ret) | |
1025 | *wait_result = KFD_IOC_WAIT_RESULT_FAIL; | |
fe528c13 FK |
1026 | else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL) |
1027 | ret = -EIO; | |
f3a39818 AL |
1028 | |
1029 | return ret; | |
1030 | } | |
1031 | ||
1032 | int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) | |
1033 | { | |
f3a39818 | 1034 | unsigned long pfn; |
50cb7dd9 | 1035 | struct kfd_signal_page *page; |
b9a5d0a5 | 1036 | int ret; |
f3a39818 | 1037 | |
b9a5d0a5 FK |
1038 | /* check required size doesn't exceed the allocated size */ |
1039 | if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) < | |
f3a39818 | 1040 | get_order(vma->vm_end - vma->vm_start)) { |
79775b62 | 1041 | pr_err("Event page mmap requested illegal size\n"); |
f3a39818 AL |
1042 | return -EINVAL; |
1043 | } | |
1044 | ||
50cb7dd9 | 1045 | page = p->signal_page; |
f3a39818 AL |
1046 | if (!page) { |
1047 | /* Probably KFD bug, but mmap is user-accessible. */ | |
50cb7dd9 | 1048 | pr_debug("Signal page could not be found\n"); |
f3a39818 AL |
1049 | return -EINVAL; |
1050 | } | |
1051 | ||
1052 | pfn = __pa(page->kernel_address); | |
1053 | pfn >>= PAGE_SHIFT; | |
1054 | ||
1c71222e SB |
1055 | vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1056 | | VM_DONTDUMP | VM_PFNMAP); | |
f3a39818 | 1057 | |
79775b62 | 1058 | pr_debug("Mapping signal page\n"); |
f3a39818 AL |
1059 | pr_debug(" start user address == 0x%08lx\n", vma->vm_start); |
1060 | pr_debug(" end user address == 0x%08lx\n", vma->vm_end); | |
1061 | pr_debug(" pfn == 0x%016lX\n", pfn); | |
1062 | pr_debug(" vm_flags == 0x%08lX\n", vma->vm_flags); | |
1063 | pr_debug(" size == 0x%08lX\n", | |
1064 | vma->vm_end - vma->vm_start); | |
1065 | ||
1066 | page->user_address = (uint64_t __user *)vma->vm_start; | |
1067 | ||
1068 | /* mapping the page to user process */ | |
b9a5d0a5 | 1069 | ret = remap_pfn_range(vma, vma->vm_start, pfn, |
f3a39818 | 1070 | vma->vm_end - vma->vm_start, vma->vm_page_prot); |
b9a5d0a5 FK |
1071 | if (!ret) |
1072 | p->signal_mapped_size = vma->vm_end - vma->vm_start; | |
1073 | ||
1074 | return ret; | |
f3a39818 | 1075 | } |
59d3e8be AS |
1076 | |
1077 | /* | |
5273e82c | 1078 | * Assumes that p is not going away. |
59d3e8be AS |
1079 | */ |
1080 | static void lookup_events_by_type_and_signal(struct kfd_process *p, | |
1081 | int type, void *event_data) | |
1082 | { | |
1083 | struct kfd_hsa_memory_exception_data *ev_data; | |
1084 | struct kfd_event *ev; | |
482f0777 | 1085 | uint32_t id; |
59d3e8be AS |
1086 | bool send_signal = true; |
1087 | ||
1088 | ev_data = (struct kfd_hsa_memory_exception_data *) event_data; | |
1089 | ||
5273e82c FK |
1090 | rcu_read_lock(); |
1091 | ||
482f0777 FK |
1092 | id = KFD_FIRST_NONSIGNAL_EVENT_ID; |
1093 | idr_for_each_entry_continue(&p->event_idr, ev, id) | |
59d3e8be AS |
1094 | if (ev->type == type) { |
1095 | send_signal = false; | |
1096 | dev_dbg(kfd_device, | |
1097 | "Event found: id %X type %d", | |
1098 | ev->event_id, ev->type); | |
5273e82c | 1099 | spin_lock(&ev->lock); |
59d3e8be AS |
1100 | set_event(ev); |
1101 | if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data) | |
1102 | ev->memory_exception_data = *ev_data; | |
5273e82c | 1103 | spin_unlock(&ev->lock); |
59d3e8be AS |
1104 | } |
1105 | ||
101fee63 MR |
1106 | if (type == KFD_EVENT_TYPE_MEMORY) { |
1107 | dev_warn(kfd_device, | |
6027b1bf YZ |
1108 | "Sending SIGSEGV to process %d (pasid 0x%x)", |
1109 | p->lead_thread->pid, p->pasid); | |
101fee63 MR |
1110 | send_sig(SIGSEGV, p->lead_thread, 0); |
1111 | } | |
1112 | ||
59d3e8be AS |
1113 | /* Send SIGTERM no event of type "type" has been found*/ |
1114 | if (send_signal) { | |
81663016 OG |
1115 | if (send_sigterm) { |
1116 | dev_warn(kfd_device, | |
6027b1bf YZ |
1117 | "Sending SIGTERM to process %d (pasid 0x%x)", |
1118 | p->lead_thread->pid, p->pasid); | |
81663016 OG |
1119 | send_sig(SIGTERM, p->lead_thread, 0); |
1120 | } else { | |
1121 | dev_err(kfd_device, | |
6027b1bf YZ |
1122 | "Process %d (pasid 0x%x) got unhandled exception", |
1123 | p->lead_thread->pid, p->pasid); | |
81663016 | 1124 | } |
59d3e8be | 1125 | } |
5273e82c FK |
1126 | |
1127 | rcu_read_unlock(); | |
59d3e8be AS |
1128 | } |
1129 | ||
64d1c3a4 | 1130 | #ifdef KFD_SUPPORT_IOMMU_V2 |
c7b6bac9 | 1131 | void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid, |
59d3e8be AS |
1132 | unsigned long address, bool is_write_requested, |
1133 | bool is_execute_requested) | |
1134 | { | |
1135 | struct kfd_hsa_memory_exception_data memory_exception_data; | |
1136 | struct vm_area_struct *vma; | |
bef153b7 | 1137 | int user_gpu_id; |
59d3e8be AS |
1138 | |
1139 | /* | |
1140 | * Because we are called from arbitrary context (workqueue) as opposed | |
1141 | * to process context, kfd_process could attempt to exit while we are | |
abb208a8 | 1142 | * running so the lookup function increments the process ref count. |
59d3e8be AS |
1143 | */ |
1144 | struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); | |
9b56bb11 | 1145 | struct mm_struct *mm; |
59d3e8be AS |
1146 | |
1147 | if (!p) | |
1148 | return; /* Presumably process exited. */ | |
1149 | ||
9b56bb11 FK |
1150 | /* Take a safe reference to the mm_struct, which may otherwise |
1151 | * disappear even while the kfd_process is still referenced. | |
1152 | */ | |
1153 | mm = get_task_mm(p->lead_thread); | |
1154 | if (!mm) { | |
abb208a8 | 1155 | kfd_unref_process(p); |
9b56bb11 FK |
1156 | return; /* Process is exiting */ |
1157 | } | |
1158 | ||
bef153b7 DYS |
1159 | user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); |
1160 | if (unlikely(user_gpu_id == -EINVAL)) { | |
1161 | WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); | |
1162 | return; | |
1163 | } | |
59d3e8be AS |
1164 | memset(&memory_exception_data, 0, sizeof(memory_exception_data)); |
1165 | ||
d8ed45c5 | 1166 | mmap_read_lock(mm); |
9b56bb11 | 1167 | vma = find_vma(mm, address); |
59d3e8be | 1168 | |
bef153b7 | 1169 | memory_exception_data.gpu_id = user_gpu_id; |
59d3e8be AS |
1170 | memory_exception_data.va = address; |
1171 | /* Set failure reason */ | |
1172 | memory_exception_data.failure.NotPresent = 1; | |
1173 | memory_exception_data.failure.NoExecute = 0; | |
1174 | memory_exception_data.failure.ReadOnly = 0; | |
359cecdd YZ |
1175 | if (vma && address >= vma->vm_start) { |
1176 | memory_exception_data.failure.NotPresent = 0; | |
1177 | ||
1178 | if (is_write_requested && !(vma->vm_flags & VM_WRITE)) | |
1179 | memory_exception_data.failure.ReadOnly = 1; | |
1180 | else | |
59d3e8be | 1181 | memory_exception_data.failure.ReadOnly = 0; |
359cecdd YZ |
1182 | |
1183 | if (is_execute_requested && !(vma->vm_flags & VM_EXEC)) | |
1184 | memory_exception_data.failure.NoExecute = 1; | |
1185 | else | |
1186 | memory_exception_data.failure.NoExecute = 0; | |
59d3e8be AS |
1187 | } |
1188 | ||
d8ed45c5 | 1189 | mmap_read_unlock(mm); |
9b56bb11 | 1190 | mmput(mm); |
59d3e8be | 1191 | |
8725aeca YZ |
1192 | pr_debug("notpresent %d, noexecute %d, readonly %d\n", |
1193 | memory_exception_data.failure.NotPresent, | |
1194 | memory_exception_data.failure.NoExecute, | |
1195 | memory_exception_data.failure.ReadOnly); | |
59d3e8be | 1196 | |
8725aeca YZ |
1197 | /* Workaround on Raven to not kill the process when memory is freed |
1198 | * before IOMMU is able to finish processing all the excessive PPRs | |
1199 | */ | |
046e674b GS |
1200 | |
1201 | if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) && | |
1202 | KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) && | |
5273e82c | 1203 | KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) |
8725aeca YZ |
1204 | lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, |
1205 | &memory_exception_data); | |
1206 | ||
abb208a8 | 1207 | kfd_unref_process(p); |
59d3e8be | 1208 | } |
64d1c3a4 | 1209 | #endif /* KFD_SUPPORT_IOMMU_V2 */ |
930c5ff4 | 1210 | |
c7b6bac9 | 1211 | void kfd_signal_hw_exception_event(u32 pasid) |
930c5ff4 AS |
1212 | { |
1213 | /* | |
1214 | * Because we are called from arbitrary context (workqueue) as opposed | |
1215 | * to process context, kfd_process could attempt to exit while we are | |
abb208a8 | 1216 | * running so the lookup function increments the process ref count. |
930c5ff4 AS |
1217 | */ |
1218 | struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); | |
1219 | ||
1220 | if (!p) | |
1221 | return; /* Presumably process exited. */ | |
1222 | ||
930c5ff4 | 1223 | lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL); |
abb208a8 | 1224 | kfd_unref_process(p); |
930c5ff4 | 1225 | } |
2640c3fa | 1226 | |
c7b6bac9 | 1227 | void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, |
2640c3fa | 1228 | struct kfd_vm_fault_info *info) |
1229 | { | |
1230 | struct kfd_event *ev; | |
1231 | uint32_t id; | |
1232 | struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); | |
1233 | struct kfd_hsa_memory_exception_data memory_exception_data; | |
bef153b7 | 1234 | int user_gpu_id; |
2640c3fa | 1235 | |
1236 | if (!p) | |
1237 | return; /* Presumably process exited. */ | |
bef153b7 DYS |
1238 | |
1239 | user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); | |
1240 | if (unlikely(user_gpu_id == -EINVAL)) { | |
1241 | WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); | |
1242 | return; | |
1243 | } | |
1244 | ||
2640c3fa | 1245 | memset(&memory_exception_data, 0, sizeof(memory_exception_data)); |
bef153b7 | 1246 | memory_exception_data.gpu_id = user_gpu_id; |
0d87c9cf | 1247 | memory_exception_data.failure.imprecise = true; |
2640c3fa | 1248 | /* Set failure reason */ |
1249 | if (info) { | |
1250 | memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; | |
1251 | memory_exception_data.failure.NotPresent = | |
1252 | info->prot_valid ? 1 : 0; | |
1253 | memory_exception_data.failure.NoExecute = | |
1254 | info->prot_exec ? 1 : 0; | |
1255 | memory_exception_data.failure.ReadOnly = | |
1256 | info->prot_write ? 1 : 0; | |
1257 | memory_exception_data.failure.imprecise = 0; | |
1258 | } | |
5273e82c FK |
1259 | |
1260 | rcu_read_lock(); | |
2640c3fa | 1261 | |
1262 | id = KFD_FIRST_NONSIGNAL_EVENT_ID; | |
1263 | idr_for_each_entry_continue(&p->event_idr, ev, id) | |
1264 | if (ev->type == KFD_EVENT_TYPE_MEMORY) { | |
5273e82c | 1265 | spin_lock(&ev->lock); |
2640c3fa | 1266 | ev->memory_exception_data = memory_exception_data; |
1267 | set_event(ev); | |
5273e82c | 1268 | spin_unlock(&ev->lock); |
2640c3fa | 1269 | } |
1270 | ||
5273e82c | 1271 | rcu_read_unlock(); |
2640c3fa | 1272 | kfd_unref_process(p); |
1273 | } | |
e42051d2 SL |
1274 | |
1275 | void kfd_signal_reset_event(struct kfd_dev *dev) | |
1276 | { | |
1277 | struct kfd_hsa_hw_exception_data hw_exception_data; | |
9b54d201 | 1278 | struct kfd_hsa_memory_exception_data memory_exception_data; |
e42051d2 SL |
1279 | struct kfd_process *p; |
1280 | struct kfd_event *ev; | |
1281 | unsigned int temp; | |
1282 | uint32_t id, idx; | |
9b54d201 EH |
1283 | int reset_cause = atomic_read(&dev->sram_ecc_flag) ? |
1284 | KFD_HW_EXCEPTION_ECC : | |
1285 | KFD_HW_EXCEPTION_GPU_HANG; | |
e42051d2 SL |
1286 | |
1287 | /* Whole gpu reset caused by GPU hang and memory is lost */ | |
1288 | memset(&hw_exception_data, 0, sizeof(hw_exception_data)); | |
e42051d2 | 1289 | hw_exception_data.memory_lost = 1; |
9b54d201 EH |
1290 | hw_exception_data.reset_cause = reset_cause; |
1291 | ||
1292 | memset(&memory_exception_data, 0, sizeof(memory_exception_data)); | |
1293 | memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC; | |
9b54d201 | 1294 | memory_exception_data.failure.imprecise = true; |
e42051d2 SL |
1295 | |
1296 | idx = srcu_read_lock(&kfd_processes_srcu); | |
1297 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | |
bef153b7 DYS |
1298 | int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); |
1299 | ||
1300 | if (unlikely(user_gpu_id == -EINVAL)) { | |
1301 | WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); | |
1302 | continue; | |
1303 | } | |
1304 | ||
5273e82c FK |
1305 | rcu_read_lock(); |
1306 | ||
e42051d2 | 1307 | id = KFD_FIRST_NONSIGNAL_EVENT_ID; |
9b54d201 | 1308 | idr_for_each_entry_continue(&p->event_idr, ev, id) { |
e42051d2 | 1309 | if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { |
5273e82c | 1310 | spin_lock(&ev->lock); |
e42051d2 | 1311 | ev->hw_exception_data = hw_exception_data; |
bef153b7 | 1312 | ev->hw_exception_data.gpu_id = user_gpu_id; |
e42051d2 | 1313 | set_event(ev); |
5273e82c | 1314 | spin_unlock(&ev->lock); |
e42051d2 | 1315 | } |
9b54d201 EH |
1316 | if (ev->type == KFD_EVENT_TYPE_MEMORY && |
1317 | reset_cause == KFD_HW_EXCEPTION_ECC) { | |
5273e82c | 1318 | spin_lock(&ev->lock); |
9b54d201 | 1319 | ev->memory_exception_data = memory_exception_data; |
bef153b7 | 1320 | ev->memory_exception_data.gpu_id = user_gpu_id; |
9b54d201 | 1321 | set_event(ev); |
5273e82c | 1322 | spin_unlock(&ev->lock); |
9b54d201 EH |
1323 | } |
1324 | } | |
5273e82c FK |
1325 | |
1326 | rcu_read_unlock(); | |
e42051d2 SL |
1327 | } |
1328 | srcu_read_unlock(&kfd_processes_srcu, idx); | |
1329 | } | |
e2b1f9f5 DL |
1330 | |
1331 | void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid) | |
1332 | { | |
1333 | struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); | |
1334 | struct kfd_hsa_memory_exception_data memory_exception_data; | |
1335 | struct kfd_hsa_hw_exception_data hw_exception_data; | |
1336 | struct kfd_event *ev; | |
1337 | uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID; | |
bef153b7 | 1338 | int user_gpu_id; |
e2b1f9f5 DL |
1339 | |
1340 | if (!p) | |
1341 | return; /* Presumably process exited. */ | |
1342 | ||
bef153b7 DYS |
1343 | user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); |
1344 | if (unlikely(user_gpu_id == -EINVAL)) { | |
1345 | WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); | |
1346 | return; | |
1347 | } | |
1348 | ||
e2b1f9f5 | 1349 | memset(&hw_exception_data, 0, sizeof(hw_exception_data)); |
bef153b7 | 1350 | hw_exception_data.gpu_id = user_gpu_id; |
e2b1f9f5 DL |
1351 | hw_exception_data.memory_lost = 1; |
1352 | hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC; | |
1353 | ||
1354 | memset(&memory_exception_data, 0, sizeof(memory_exception_data)); | |
1355 | memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED; | |
bef153b7 | 1356 | memory_exception_data.gpu_id = user_gpu_id; |
e2b1f9f5 DL |
1357 | memory_exception_data.failure.imprecise = true; |
1358 | ||
5273e82c FK |
1359 | rcu_read_lock(); |
1360 | ||
e2b1f9f5 DL |
1361 | idr_for_each_entry_continue(&p->event_idr, ev, id) { |
1362 | if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { | |
5273e82c | 1363 | spin_lock(&ev->lock); |
e2b1f9f5 DL |
1364 | ev->hw_exception_data = hw_exception_data; |
1365 | set_event(ev); | |
5273e82c | 1366 | spin_unlock(&ev->lock); |
e2b1f9f5 DL |
1367 | } |
1368 | ||
1369 | if (ev->type == KFD_EVENT_TYPE_MEMORY) { | |
5273e82c | 1370 | spin_lock(&ev->lock); |
e2b1f9f5 DL |
1371 | ev->memory_exception_data = memory_exception_data; |
1372 | set_event(ev); | |
5273e82c | 1373 | spin_unlock(&ev->lock); |
e2b1f9f5 DL |
1374 | } |
1375 | } | |
5273e82c FK |
1376 | |
1377 | rcu_read_unlock(); | |
e2b1f9f5 DL |
1378 | |
1379 | /* user application will handle SIGBUS signal */ | |
1380 | send_sig(SIGBUS, p->lead_thread, 0); | |
96b62c8a DL |
1381 | |
1382 | kfd_unref_process(p); | |
e2b1f9f5 | 1383 | } |