Commit | Line | Data |
---|---|---|
eff6f4a0 OG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * Copyright 2016-2019 HabanaLabs, Ltd. | |
5 | * All Rights Reserved. | |
6 | */ | |
7 | ||
8 | #include <uapi/misc/habanalabs.h> | |
9 | #include "habanalabs.h" | |
10 | ||
11 | #include <linux/uaccess.h> | |
12 | #include <linux/slab.h> | |
13 | ||
f9e5f295 OS |
14 | #define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT) |
15 | ||
eff6f4a0 OG |
16 | static void job_wq_completion(struct work_struct *work); |
17 | static long _hl_cs_wait_ioctl(struct hl_device *hdev, | |
18 | struct hl_ctx *ctx, u64 timeout_us, u64 seq); | |
19 | static void cs_do_release(struct kref *ref); | |
20 | ||
b75f2250 OS |
21 | static void hl_sob_reset(struct kref *ref) |
22 | { | |
23 | struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, | |
24 | kref); | |
25 | struct hl_device *hdev = hw_sob->hdev; | |
26 | ||
27 | hdev->asic_funcs->reset_sob(hdev, hw_sob); | |
28 | } | |
29 | ||
30 | void hl_sob_reset_error(struct kref *ref) | |
31 | { | |
32 | struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, | |
33 | kref); | |
34 | struct hl_device *hdev = hw_sob->hdev; | |
35 | ||
36 | dev_crit(hdev->dev, | |
37 | "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", | |
38 | hw_sob->q_idx, hw_sob->sob_id); | |
39 | } | |
40 | ||
eff6f4a0 OG |
41 | static const char *hl_fence_get_driver_name(struct dma_fence *fence) |
42 | { | |
43 | return "HabanaLabs"; | |
44 | } | |
45 | ||
46 | static const char *hl_fence_get_timeline_name(struct dma_fence *fence) | |
47 | { | |
b0b5d925 OS |
48 | struct hl_cs_compl *hl_cs_compl = |
49 | container_of(fence, struct hl_cs_compl, base_fence); | |
eff6f4a0 | 50 | |
b0b5d925 | 51 | return dev_name(hl_cs_compl->hdev->dev); |
eff6f4a0 OG |
52 | } |
53 | ||
54 | static bool hl_fence_enable_signaling(struct dma_fence *fence) | |
55 | { | |
56 | return true; | |
57 | } | |
58 | ||
59 | static void hl_fence_release(struct dma_fence *fence) | |
60 | { | |
b0b5d925 OS |
61 | struct hl_cs_compl *hl_cs_cmpl = |
62 | container_of(fence, struct hl_cs_compl, base_fence); | |
b75f2250 OS |
63 | struct hl_device *hdev = hl_cs_cmpl->hdev; |
64 | ||
3292055c OB |
65 | /* EBUSY means the CS was never submitted and hence we don't have |
66 | * an attached hw_sob object that we should handle here | |
67 | */ | |
68 | if (fence->error == -EBUSY) | |
69 | goto free; | |
70 | ||
b75f2250 OS |
71 | if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || |
72 | (hl_cs_cmpl->type == CS_TYPE_WAIT)) { | |
73 | ||
74 | dev_dbg(hdev->dev, | |
75 | "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", | |
76 | hl_cs_cmpl->cs_seq, | |
77 | hl_cs_cmpl->type, | |
78 | hl_cs_cmpl->hw_sob->sob_id, | |
79 | hl_cs_cmpl->sob_val); | |
80 | ||
81 | /* | |
82 | * A signal CS can get completion while the corresponding wait | |
83 | * for signal CS is on its way to the PQ. The wait for signal CS | |
84 | * will get stuck if the signal CS incremented the SOB to its | |
85 | * max value and there are no pending (submitted) waits on this | |
86 | * SOB. | |
87 | * We do the following to void this situation: | |
88 | * 1. The wait for signal CS must get a ref for the signal CS as | |
89 | * soon as possible in cs_ioctl_signal_wait() and put it | |
90 | * before being submitted to the PQ but after it incremented | |
91 | * the SOB refcnt in init_signal_wait_cs(). | |
92 | * 2. Signal/Wait for signal CS will decrement the SOB refcnt | |
93 | * here. | |
94 | * These two measures guarantee that the wait for signal CS will | |
95 | * reset the SOB upon completion rather than the signal CS and | |
96 | * hence the above scenario is avoided. | |
97 | */ | |
98 | kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); | |
99 | } | |
eff6f4a0 | 100 | |
3292055c | 101 | free: |
b0b5d925 | 102 | kfree_rcu(hl_cs_cmpl, base_fence.rcu); |
eff6f4a0 OG |
103 | } |
104 | ||
105 | static const struct dma_fence_ops hl_fence_ops = { | |
106 | .get_driver_name = hl_fence_get_driver_name, | |
107 | .get_timeline_name = hl_fence_get_timeline_name, | |
108 | .enable_signaling = hl_fence_enable_signaling, | |
eff6f4a0 OG |
109 | .release = hl_fence_release |
110 | }; | |
111 | ||
112 | static void cs_get(struct hl_cs *cs) | |
113 | { | |
114 | kref_get(&cs->refcount); | |
115 | } | |
116 | ||
117 | static int cs_get_unless_zero(struct hl_cs *cs) | |
118 | { | |
119 | return kref_get_unless_zero(&cs->refcount); | |
120 | } | |
121 | ||
122 | static void cs_put(struct hl_cs *cs) | |
123 | { | |
124 | kref_put(&cs->refcount, cs_do_release); | |
125 | } | |
126 | ||
cb596aee TT |
127 | static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) |
128 | { | |
129 | /* | |
130 | * Patched CB is created for external queues jobs, and for H/W queues | |
131 | * jobs if the user CB was allocated by driver and MMU is disabled. | |
132 | */ | |
133 | return (job->queue_type == QUEUE_TYPE_EXT || | |
134 | (job->queue_type == QUEUE_TYPE_HW && | |
135 | job->is_kernel_allocated_cb && | |
136 | !hdev->mmu_enable)); | |
137 | } | |
138 | ||
eff6f4a0 OG |
139 | /* |
140 | * cs_parser - parse the user command submission | |
141 | * | |
142 | * @hpriv : pointer to the private data of the fd | |
143 | * @job : pointer to the job that holds the command submission info | |
144 | * | |
145 | * The function parses the command submission of the user. It calls the | |
146 | * ASIC specific parser, which returns a list of memory blocks to send | |
147 | * to the device as different command buffers | |
148 | * | |
149 | */ | |
150 | static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) | |
151 | { | |
152 | struct hl_device *hdev = hpriv->hdev; | |
153 | struct hl_cs_parser parser; | |
154 | int rc; | |
155 | ||
156 | parser.ctx_id = job->cs->ctx->asid; | |
157 | parser.cs_sequence = job->cs->sequence; | |
158 | parser.job_id = job->id; | |
159 | ||
160 | parser.hw_queue_id = job->hw_queue_id; | |
161 | parser.job_userptr_list = &job->userptr_list; | |
162 | parser.patched_cb = NULL; | |
163 | parser.user_cb = job->user_cb; | |
164 | parser.user_cb_size = job->user_cb_size; | |
cb596aee TT |
165 | parser.queue_type = job->queue_type; |
166 | parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; | |
eff6f4a0 | 167 | job->patched_cb = NULL; |
eff6f4a0 OG |
168 | |
169 | rc = hdev->asic_funcs->cs_parser(hdev, &parser); | |
cb596aee TT |
170 | |
171 | if (is_cb_patched(hdev, job)) { | |
eff6f4a0 OG |
172 | if (!rc) { |
173 | job->patched_cb = parser.patched_cb; | |
174 | job->job_cb_size = parser.patched_cb_size; | |
926ba4cc | 175 | job->contains_dma_pkt = parser.contains_dma_pkt; |
eff6f4a0 OG |
176 | |
177 | spin_lock(&job->patched_cb->lock); | |
178 | job->patched_cb->cs_cnt++; | |
179 | spin_unlock(&job->patched_cb->lock); | |
180 | } | |
181 | ||
182 | /* | |
183 | * Whether the parsing worked or not, we don't need the | |
184 | * original CB anymore because it was already parsed and | |
185 | * won't be accessed again for this CS | |
186 | */ | |
187 | spin_lock(&job->user_cb->lock); | |
188 | job->user_cb->cs_cnt--; | |
189 | spin_unlock(&job->user_cb->lock); | |
190 | hl_cb_put(job->user_cb); | |
191 | job->user_cb = NULL; | |
240c92fd OS |
192 | } else if (!rc) { |
193 | job->job_cb_size = job->user_cb_size; | |
eff6f4a0 OG |
194 | } |
195 | ||
196 | return rc; | |
197 | } | |
198 | ||
199 | static void free_job(struct hl_device *hdev, struct hl_cs_job *job) | |
200 | { | |
201 | struct hl_cs *cs = job->cs; | |
202 | ||
cb596aee | 203 | if (is_cb_patched(hdev, job)) { |
eff6f4a0 OG |
204 | hl_userptr_delete_list(hdev, &job->userptr_list); |
205 | ||
206 | /* | |
207 | * We might arrive here from rollback and patched CB wasn't | |
208 | * created, so we need to check it's not NULL | |
209 | */ | |
210 | if (job->patched_cb) { | |
211 | spin_lock(&job->patched_cb->lock); | |
212 | job->patched_cb->cs_cnt--; | |
213 | spin_unlock(&job->patched_cb->lock); | |
214 | ||
215 | hl_cb_put(job->patched_cb); | |
216 | } | |
217 | } | |
218 | ||
cb596aee TT |
219 | /* For H/W queue jobs, if a user CB was allocated by driver and MMU is |
220 | * enabled, the user CB isn't released in cs_parser() and thus should be | |
221 | * released here. | |
222 | */ | |
223 | if (job->queue_type == QUEUE_TYPE_HW && | |
224 | job->is_kernel_allocated_cb && hdev->mmu_enable) { | |
225 | spin_lock(&job->user_cb->lock); | |
226 | job->user_cb->cs_cnt--; | |
227 | spin_unlock(&job->user_cb->lock); | |
228 | ||
229 | hl_cb_put(job->user_cb); | |
230 | } | |
231 | ||
eff6f4a0 OG |
232 | /* |
233 | * This is the only place where there can be multiple threads | |
234 | * modifying the list at the same time | |
235 | */ | |
236 | spin_lock(&cs->job_lock); | |
237 | list_del(&job->cs_node); | |
238 | spin_unlock(&cs->job_lock); | |
239 | ||
c2164773 OG |
240 | hl_debugfs_remove_job(hdev, job); |
241 | ||
cb596aee TT |
242 | if (job->queue_type == QUEUE_TYPE_EXT || |
243 | job->queue_type == QUEUE_TYPE_HW) | |
eff6f4a0 OG |
244 | cs_put(cs); |
245 | ||
246 | kfree(job); | |
247 | } | |
248 | ||
db491e4f OB |
249 | static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) |
250 | { | |
251 | hdev->aggregated_cs_counters.device_in_reset_drop_cnt += | |
252 | ctx->cs_counters.device_in_reset_drop_cnt; | |
253 | hdev->aggregated_cs_counters.out_of_mem_drop_cnt += | |
254 | ctx->cs_counters.out_of_mem_drop_cnt; | |
255 | hdev->aggregated_cs_counters.parsing_drop_cnt += | |
256 | ctx->cs_counters.parsing_drop_cnt; | |
257 | hdev->aggregated_cs_counters.queue_full_drop_cnt += | |
258 | ctx->cs_counters.queue_full_drop_cnt; | |
259 | } | |
260 | ||
eff6f4a0 OG |
261 | static void cs_do_release(struct kref *ref) |
262 | { | |
263 | struct hl_cs *cs = container_of(ref, struct hl_cs, | |
264 | refcount); | |
265 | struct hl_device *hdev = cs->ctx->hdev; | |
266 | struct hl_cs_job *job, *tmp; | |
267 | ||
268 | cs->completed = true; | |
269 | ||
270 | /* | |
271 | * Although if we reached here it means that all external jobs have | |
272 | * finished, because each one of them took refcnt to CS, we still | |
273 | * need to go over the internal jobs and free them. Otherwise, we | |
274 | * will have leaked memory and what's worse, the CS object (and | |
275 | * potentially the CTX object) could be released, while the JOB | |
276 | * still holds a pointer to them (but no reference). | |
277 | */ | |
278 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) | |
279 | free_job(hdev, job); | |
280 | ||
281 | /* We also need to update CI for internal queues */ | |
282 | if (cs->submitted) { | |
75b3cb2b | 283 | hdev->asic_funcs->hw_queues_lock(hdev); |
cbaa99ed | 284 | |
75b3cb2b OG |
285 | hdev->cs_active_cnt--; |
286 | if (!hdev->cs_active_cnt) { | |
287 | struct hl_device_idle_busy_ts *ts; | |
288 | ||
289 | ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; | |
290 | ts->busy_to_idle_ts = ktime_get(); | |
291 | ||
292 | if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) | |
293 | hdev->idle_busy_ts_idx = 0; | |
294 | } else if (hdev->cs_active_cnt < 0) { | |
295 | dev_crit(hdev->dev, "CS active cnt %d is negative\n", | |
296 | hdev->cs_active_cnt); | |
297 | } | |
298 | ||
299 | hdev->asic_funcs->hw_queues_unlock(hdev); | |
cbaa99ed | 300 | |
eff6f4a0 OG |
301 | hl_int_hw_queue_update_ci(cs); |
302 | ||
303 | spin_lock(&hdev->hw_queues_mirror_lock); | |
304 | /* remove CS from hw_queues mirror list */ | |
305 | list_del_init(&cs->mirror_node); | |
306 | spin_unlock(&hdev->hw_queues_mirror_lock); | |
307 | ||
308 | /* | |
309 | * Don't cancel TDR in case this CS was timedout because we | |
310 | * might be running from the TDR context | |
311 | */ | |
312 | if ((!cs->timedout) && | |
313 | (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) { | |
314 | struct hl_cs *next; | |
315 | ||
316 | if (cs->tdr_active) | |
317 | cancel_delayed_work_sync(&cs->work_tdr); | |
318 | ||
319 | spin_lock(&hdev->hw_queues_mirror_lock); | |
320 | ||
321 | /* queue TDR for next CS */ | |
322 | next = list_first_entry_or_null( | |
323 | &hdev->hw_queues_mirror_list, | |
324 | struct hl_cs, mirror_node); | |
325 | ||
326 | if ((next) && (!next->tdr_active)) { | |
327 | next->tdr_active = true; | |
328 | schedule_delayed_work(&next->work_tdr, | |
329 | hdev->timeout_jiffies); | |
330 | } | |
331 | ||
332 | spin_unlock(&hdev->hw_queues_mirror_lock); | |
333 | } | |
b75f2250 OS |
334 | } else if (cs->type == CS_TYPE_WAIT) { |
335 | /* | |
336 | * In case the wait for signal CS was submitted, the put occurs | |
337 | * in init_signal_wait_cs() right before hanging on the PQ. | |
338 | */ | |
339 | dma_fence_put(cs->signal_fence); | |
eff6f4a0 OG |
340 | } |
341 | ||
c2164773 OG |
342 | /* |
343 | * Must be called before hl_ctx_put because inside we use ctx to get | |
344 | * the device | |
345 | */ | |
346 | hl_debugfs_remove_cs(cs); | |
347 | ||
eff6f4a0 OG |
348 | hl_ctx_put(cs->ctx); |
349 | ||
3292055c OB |
350 | /* We need to mark an error for not submitted because in that case |
351 | * the dma fence release flow is different. Mainly, we don't need | |
352 | * to handle hw_sob for signal/wait | |
353 | */ | |
eff6f4a0 OG |
354 | if (cs->timedout) |
355 | dma_fence_set_error(cs->fence, -ETIMEDOUT); | |
356 | else if (cs->aborted) | |
357 | dma_fence_set_error(cs->fence, -EIO); | |
3292055c OB |
358 | else if (!cs->submitted) |
359 | dma_fence_set_error(cs->fence, -EBUSY); | |
eff6f4a0 OG |
360 | |
361 | dma_fence_signal(cs->fence); | |
362 | dma_fence_put(cs->fence); | |
363 | ||
db491e4f OB |
364 | cs_counters_aggregate(hdev, cs->ctx); |
365 | ||
3abc99bb | 366 | kfree(cs->jobs_in_queue_cnt); |
eff6f4a0 OG |
367 | kfree(cs); |
368 | } | |
369 | ||
370 | static void cs_timedout(struct work_struct *work) | |
371 | { | |
372 | struct hl_device *hdev; | |
22362aa3 | 373 | int rc; |
eff6f4a0 OG |
374 | struct hl_cs *cs = container_of(work, struct hl_cs, |
375 | work_tdr.work); | |
376 | rc = cs_get_unless_zero(cs); | |
377 | if (!rc) | |
378 | return; | |
379 | ||
380 | if ((!cs->submitted) || (cs->completed)) { | |
381 | cs_put(cs); | |
382 | return; | |
383 | } | |
384 | ||
385 | /* Mark the CS is timed out so we won't try to cancel its TDR */ | |
386 | cs->timedout = true; | |
387 | ||
388 | hdev = cs->ctx->hdev; | |
eff6f4a0 | 389 | |
0eab4f89 OG |
390 | dev_err(hdev->dev, |
391 | "Command submission %llu has not finished in time!\n", | |
392 | cs->sequence); | |
eff6f4a0 OG |
393 | |
394 | cs_put(cs); | |
395 | ||
396 | if (hdev->reset_on_lockup) | |
397 | hl_device_reset(hdev, false, false); | |
398 | } | |
399 | ||
400 | static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, | |
b75f2250 | 401 | enum hl_cs_type cs_type, struct hl_cs **cs_new) |
eff6f4a0 | 402 | { |
b0b5d925 | 403 | struct hl_cs_compl *cs_cmpl; |
eff6f4a0 OG |
404 | struct dma_fence *other = NULL; |
405 | struct hl_cs *cs; | |
406 | int rc; | |
407 | ||
408 | cs = kzalloc(sizeof(*cs), GFP_ATOMIC); | |
409 | if (!cs) | |
410 | return -ENOMEM; | |
411 | ||
412 | cs->ctx = ctx; | |
413 | cs->submitted = false; | |
414 | cs->completed = false; | |
b75f2250 | 415 | cs->type = cs_type; |
eff6f4a0 OG |
416 | INIT_LIST_HEAD(&cs->job_list); |
417 | INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); | |
418 | kref_init(&cs->refcount); | |
419 | spin_lock_init(&cs->job_lock); | |
420 | ||
b0b5d925 OS |
421 | cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); |
422 | if (!cs_cmpl) { | |
eff6f4a0 OG |
423 | rc = -ENOMEM; |
424 | goto free_cs; | |
425 | } | |
426 | ||
b0b5d925 | 427 | cs_cmpl->hdev = hdev; |
b75f2250 | 428 | cs_cmpl->type = cs->type; |
b0b5d925 OS |
429 | spin_lock_init(&cs_cmpl->lock); |
430 | cs->fence = &cs_cmpl->base_fence; | |
eff6f4a0 OG |
431 | |
432 | spin_lock(&ctx->cs_lock); | |
433 | ||
b0b5d925 | 434 | cs_cmpl->cs_seq = ctx->cs_sequence; |
c16d45f4 OB |
435 | other = ctx->cs_pending[cs_cmpl->cs_seq & |
436 | (hdev->asic_prop.max_pending_cs - 1)]; | |
eff6f4a0 | 437 | if ((other) && (!dma_fence_is_signaled(other))) { |
52a1ae11 OG |
438 | dev_dbg(hdev->dev, |
439 | "Rejecting CS because of too many in-flights CS\n"); | |
eff6f4a0 OG |
440 | rc = -EAGAIN; |
441 | goto free_fence; | |
442 | } | |
443 | ||
3abc99bb OB |
444 | cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, |
445 | sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); | |
446 | if (!cs->jobs_in_queue_cnt) { | |
447 | rc = -ENOMEM; | |
448 | goto free_fence; | |
449 | } | |
450 | ||
b0b5d925 | 451 | dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock, |
eff6f4a0 OG |
452 | ctx->asid, ctx->cs_sequence); |
453 | ||
b0b5d925 | 454 | cs->sequence = cs_cmpl->cs_seq; |
eff6f4a0 | 455 | |
c16d45f4 OB |
456 | ctx->cs_pending[cs_cmpl->cs_seq & |
457 | (hdev->asic_prop.max_pending_cs - 1)] = | |
b0b5d925 | 458 | &cs_cmpl->base_fence; |
eff6f4a0 OG |
459 | ctx->cs_sequence++; |
460 | ||
b0b5d925 | 461 | dma_fence_get(&cs_cmpl->base_fence); |
eff6f4a0 OG |
462 | |
463 | dma_fence_put(other); | |
464 | ||
465 | spin_unlock(&ctx->cs_lock); | |
466 | ||
467 | *cs_new = cs; | |
468 | ||
469 | return 0; | |
470 | ||
471 | free_fence: | |
3abc99bb | 472 | spin_unlock(&ctx->cs_lock); |
b0b5d925 | 473 | kfree(cs_cmpl); |
eff6f4a0 OG |
474 | free_cs: |
475 | kfree(cs); | |
476 | return rc; | |
477 | } | |
478 | ||
479 | static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) | |
480 | { | |
481 | struct hl_cs_job *job, *tmp; | |
482 | ||
483 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) | |
484 | free_job(hdev, job); | |
485 | } | |
486 | ||
487 | void hl_cs_rollback_all(struct hl_device *hdev) | |
488 | { | |
5574cb21 | 489 | int i; |
eff6f4a0 OG |
490 | struct hl_cs *cs, *tmp; |
491 | ||
492 | /* flush all completions */ | |
5574cb21 OB |
493 | for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) |
494 | flush_workqueue(hdev->cq_wq[i]); | |
eff6f4a0 OG |
495 | |
496 | /* Make sure we don't have leftovers in the H/W queues mirror list */ | |
497 | list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, | |
498 | mirror_node) { | |
499 | cs_get(cs); | |
500 | cs->aborted = true; | |
501 | dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", | |
502 | cs->ctx->asid, cs->sequence); | |
503 | cs_rollback(hdev, cs); | |
504 | cs_put(cs); | |
505 | } | |
506 | } | |
507 | ||
508 | static void job_wq_completion(struct work_struct *work) | |
509 | { | |
510 | struct hl_cs_job *job = container_of(work, struct hl_cs_job, | |
511 | finish_work); | |
512 | struct hl_cs *cs = job->cs; | |
513 | struct hl_device *hdev = cs->ctx->hdev; | |
514 | ||
515 | /* job is no longer needed */ | |
516 | free_job(hdev, job); | |
517 | } | |
518 | ||
cb596aee TT |
519 | static int validate_queue_index(struct hl_device *hdev, |
520 | struct hl_cs_chunk *chunk, | |
521 | enum hl_queue_type *queue_type, | |
522 | bool *is_kernel_allocated_cb) | |
eff6f4a0 OG |
523 | { |
524 | struct asic_fixed_properties *asic = &hdev->asic_prop; | |
525 | struct hw_queue_properties *hw_queue_prop; | |
eff6f4a0 | 526 | |
3abc99bb OB |
527 | /* This must be checked here to prevent out-of-bounds access to |
528 | * hw_queues_props array | |
529 | */ | |
530 | if (chunk->queue_index >= asic->max_queues) { | |
531 | dev_err(hdev->dev, "Queue index %d is invalid\n", | |
532 | chunk->queue_index); | |
533 | return -EINVAL; | |
534 | } | |
535 | ||
eff6f4a0 OG |
536 | hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; |
537 | ||
3abc99bb | 538 | if (hw_queue_prop->type == QUEUE_TYPE_NA) { |
eff6f4a0 OG |
539 | dev_err(hdev->dev, "Queue index %d is invalid\n", |
540 | chunk->queue_index); | |
cb596aee | 541 | return -EINVAL; |
eff6f4a0 OG |
542 | } |
543 | ||
4c172bbf OG |
544 | if (hw_queue_prop->driver_only) { |
545 | dev_err(hdev->dev, | |
546 | "Queue index %d is restricted for the kernel driver\n", | |
eff6f4a0 | 547 | chunk->queue_index); |
cb596aee | 548 | return -EINVAL; |
df762375 TT |
549 | } |
550 | ||
cb596aee TT |
551 | *queue_type = hw_queue_prop->type; |
552 | *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; | |
553 | ||
554 | return 0; | |
555 | } | |
556 | ||
557 | static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, | |
558 | struct hl_cb_mgr *cb_mgr, | |
559 | struct hl_cs_chunk *chunk) | |
560 | { | |
561 | struct hl_cb *cb; | |
562 | u32 cb_handle; | |
eff6f4a0 | 563 | |
eff6f4a0 OG |
564 | cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); |
565 | ||
566 | cb = hl_cb_get(hdev, cb_mgr, cb_handle); | |
567 | if (!cb) { | |
568 | dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); | |
569 | return NULL; | |
570 | } | |
571 | ||
572 | if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { | |
573 | dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); | |
574 | goto release_cb; | |
575 | } | |
576 | ||
577 | spin_lock(&cb->lock); | |
578 | cb->cs_cnt++; | |
579 | spin_unlock(&cb->lock); | |
580 | ||
581 | return cb; | |
582 | ||
583 | release_cb: | |
584 | hl_cb_put(cb); | |
585 | return NULL; | |
586 | } | |
587 | ||
cb596aee TT |
588 | struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, |
589 | enum hl_queue_type queue_type, bool is_kernel_allocated_cb) | |
eff6f4a0 OG |
590 | { |
591 | struct hl_cs_job *job; | |
592 | ||
593 | job = kzalloc(sizeof(*job), GFP_ATOMIC); | |
594 | if (!job) | |
595 | return NULL; | |
596 | ||
cb596aee TT |
597 | job->queue_type = queue_type; |
598 | job->is_kernel_allocated_cb = is_kernel_allocated_cb; | |
eff6f4a0 | 599 | |
cb596aee | 600 | if (is_cb_patched(hdev, job)) |
eff6f4a0 | 601 | INIT_LIST_HEAD(&job->userptr_list); |
cb596aee TT |
602 | |
603 | if (job->queue_type == QUEUE_TYPE_EXT) | |
eff6f4a0 | 604 | INIT_WORK(&job->finish_work, job_wq_completion); |
eff6f4a0 OG |
605 | |
606 | return job; | |
607 | } | |
608 | ||
b75f2250 OS |
609 | static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, |
610 | u32 num_chunks, u64 *cs_seq) | |
eff6f4a0 OG |
611 | { |
612 | struct hl_device *hdev = hpriv->hdev; | |
613 | struct hl_cs_chunk *cs_chunk_array; | |
614 | struct hl_cs_job *job; | |
615 | struct hl_cs *cs; | |
616 | struct hl_cb *cb; | |
cb596aee | 617 | bool int_queues_only = true; |
eff6f4a0 | 618 | u32 size_to_copy; |
b41e9728 | 619 | int rc, i; |
eff6f4a0 OG |
620 | |
621 | *cs_seq = ULLONG_MAX; | |
622 | ||
623 | if (num_chunks > HL_MAX_JOBS_PER_CS) { | |
624 | dev_err(hdev->dev, | |
625 | "Number of chunks can NOT be larger than %d\n", | |
626 | HL_MAX_JOBS_PER_CS); | |
627 | rc = -EINVAL; | |
628 | goto out; | |
629 | } | |
630 | ||
631 | cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), | |
632 | GFP_ATOMIC); | |
633 | if (!cs_chunk_array) { | |
634 | rc = -ENOMEM; | |
635 | goto out; | |
636 | } | |
637 | ||
638 | size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); | |
639 | if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { | |
640 | dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); | |
641 | rc = -EFAULT; | |
642 | goto free_cs_chunk_array; | |
643 | } | |
644 | ||
645 | /* increment refcnt for context */ | |
646 | hl_ctx_get(hdev, hpriv->ctx); | |
647 | ||
b75f2250 | 648 | rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs); |
eff6f4a0 OG |
649 | if (rc) { |
650 | hl_ctx_put(hpriv->ctx); | |
651 | goto free_cs_chunk_array; | |
652 | } | |
653 | ||
654 | *cs_seq = cs->sequence; | |
655 | ||
c2164773 OG |
656 | hl_debugfs_add_cs(cs); |
657 | ||
eff6f4a0 | 658 | /* Validate ALL the CS chunks before submitting the CS */ |
b41e9728 | 659 | for (i = 0 ; i < num_chunks ; i++) { |
eff6f4a0 | 660 | struct hl_cs_chunk *chunk = &cs_chunk_array[i]; |
cb596aee TT |
661 | enum hl_queue_type queue_type; |
662 | bool is_kernel_allocated_cb; | |
663 | ||
664 | rc = validate_queue_index(hdev, chunk, &queue_type, | |
665 | &is_kernel_allocated_cb); | |
db491e4f OB |
666 | if (rc) { |
667 | hpriv->ctx->cs_counters.parsing_drop_cnt++; | |
cb596aee | 668 | goto free_cs_object; |
db491e4f | 669 | } |
eff6f4a0 | 670 | |
cb596aee TT |
671 | if (is_kernel_allocated_cb) { |
672 | cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); | |
eff6f4a0 | 673 | if (!cb) { |
db491e4f | 674 | hpriv->ctx->cs_counters.parsing_drop_cnt++; |
eff6f4a0 OG |
675 | rc = -EINVAL; |
676 | goto free_cs_object; | |
677 | } | |
cb596aee TT |
678 | } else { |
679 | cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; | |
eff6f4a0 OG |
680 | } |
681 | ||
cb596aee TT |
682 | if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) |
683 | int_queues_only = false; | |
684 | ||
685 | job = hl_cs_allocate_job(hdev, queue_type, | |
686 | is_kernel_allocated_cb); | |
eff6f4a0 | 687 | if (!job) { |
db491e4f | 688 | hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; |
eff6f4a0 OG |
689 | dev_err(hdev->dev, "Failed to allocate a new job\n"); |
690 | rc = -ENOMEM; | |
cb596aee | 691 | if (is_kernel_allocated_cb) |
eff6f4a0 OG |
692 | goto release_cb; |
693 | else | |
694 | goto free_cs_object; | |
695 | } | |
696 | ||
697 | job->id = i + 1; | |
698 | job->cs = cs; | |
699 | job->user_cb = cb; | |
700 | job->user_cb_size = chunk->cb_size; | |
eff6f4a0 OG |
701 | job->hw_queue_id = chunk->queue_index; |
702 | ||
703 | cs->jobs_in_queue_cnt[job->hw_queue_id]++; | |
704 | ||
705 | list_add_tail(&job->cs_node, &cs->job_list); | |
706 | ||
707 | /* | |
708 | * Increment CS reference. When CS reference is 0, CS is | |
709 | * done and can be signaled to user and free all its resources | |
cb596aee TT |
710 | * Only increment for JOB on external or H/W queues, because |
711 | * only for those JOBs we get completion | |
eff6f4a0 | 712 | */ |
cb596aee TT |
713 | if (job->queue_type == QUEUE_TYPE_EXT || |
714 | job->queue_type == QUEUE_TYPE_HW) | |
eff6f4a0 OG |
715 | cs_get(cs); |
716 | ||
c2164773 OG |
717 | hl_debugfs_add_job(hdev, job); |
718 | ||
eff6f4a0 OG |
719 | rc = cs_parser(hpriv, job); |
720 | if (rc) { | |
db491e4f | 721 | hpriv->ctx->cs_counters.parsing_drop_cnt++; |
eff6f4a0 OG |
722 | dev_err(hdev->dev, |
723 | "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", | |
724 | cs->ctx->asid, cs->sequence, job->id, rc); | |
725 | goto free_cs_object; | |
726 | } | |
727 | } | |
728 | ||
cb596aee | 729 | if (int_queues_only) { |
db491e4f | 730 | hpriv->ctx->cs_counters.parsing_drop_cnt++; |
eff6f4a0 | 731 | dev_err(hdev->dev, |
cb596aee | 732 | "Reject CS %d.%llu because only internal queues jobs are present\n", |
eff6f4a0 OG |
733 | cs->ctx->asid, cs->sequence); |
734 | rc = -EINVAL; | |
735 | goto free_cs_object; | |
736 | } | |
737 | ||
738 | rc = hl_hw_queue_schedule_cs(cs); | |
739 | if (rc) { | |
eda58bf7 OG |
740 | if (rc != -EAGAIN) |
741 | dev_err(hdev->dev, | |
742 | "Failed to submit CS %d.%llu to H/W queues, error %d\n", | |
743 | cs->ctx->asid, cs->sequence, rc); | |
eff6f4a0 OG |
744 | goto free_cs_object; |
745 | } | |
746 | ||
747 | rc = HL_CS_STATUS_SUCCESS; | |
748 | goto put_cs; | |
749 | ||
750 | release_cb: | |
751 | spin_lock(&cb->lock); | |
752 | cb->cs_cnt--; | |
753 | spin_unlock(&cb->lock); | |
754 | hl_cb_put(cb); | |
755 | free_cs_object: | |
756 | cs_rollback(hdev, cs); | |
757 | *cs_seq = ULLONG_MAX; | |
758 | /* The path below is both for good and erroneous exits */ | |
759 | put_cs: | |
760 | /* We finished with the CS in this function, so put the ref */ | |
761 | cs_put(cs); | |
762 | free_cs_chunk_array: | |
763 | kfree(cs_chunk_array); | |
764 | out: | |
765 | return rc; | |
766 | } | |
767 | ||
b75f2250 OS |
768 | static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, |
769 | void __user *chunks, u32 num_chunks, | |
770 | u64 *cs_seq) | |
771 | { | |
772 | struct hl_device *hdev = hpriv->hdev; | |
773 | struct hl_ctx *ctx = hpriv->ctx; | |
774 | struct hl_cs_chunk *cs_chunk_array, *chunk; | |
775 | struct hw_queue_properties *hw_queue_prop; | |
776 | struct dma_fence *sig_fence = NULL; | |
777 | struct hl_cs_job *job; | |
778 | struct hl_cs *cs; | |
779 | struct hl_cb *cb; | |
21e7a346 | 780 | enum hl_queue_type q_type; |
b75f2250 OS |
781 | u64 *signal_seq_arr = NULL, signal_seq; |
782 | u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; | |
783 | int rc; | |
784 | ||
785 | *cs_seq = ULLONG_MAX; | |
786 | ||
787 | if (num_chunks > HL_MAX_JOBS_PER_CS) { | |
788 | dev_err(hdev->dev, | |
789 | "Number of chunks can NOT be larger than %d\n", | |
790 | HL_MAX_JOBS_PER_CS); | |
791 | rc = -EINVAL; | |
792 | goto out; | |
793 | } | |
794 | ||
795 | cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), | |
796 | GFP_ATOMIC); | |
797 | if (!cs_chunk_array) { | |
798 | rc = -ENOMEM; | |
799 | goto out; | |
800 | } | |
801 | ||
802 | size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); | |
803 | if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { | |
804 | dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); | |
805 | rc = -EFAULT; | |
806 | goto free_cs_chunk_array; | |
807 | } | |
808 | ||
809 | /* currently it is guaranteed to have only one chunk */ | |
810 | chunk = &cs_chunk_array[0]; | |
1cff1197 OB |
811 | |
812 | if (chunk->queue_index >= hdev->asic_prop.max_queues) { | |
813 | dev_err(hdev->dev, "Queue index %d is invalid\n", | |
814 | chunk->queue_index); | |
815 | rc = -EINVAL; | |
816 | goto free_cs_chunk_array; | |
817 | } | |
818 | ||
b75f2250 OS |
819 | q_idx = chunk->queue_index; |
820 | hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; | |
21e7a346 | 821 | q_type = hw_queue_prop->type; |
b75f2250 | 822 | |
3abc99bb | 823 | if ((q_idx >= hdev->asic_prop.max_queues) || |
21e7a346 | 824 | (!hw_queue_prop->supports_sync_stream)) { |
b75f2250 OS |
825 | dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); |
826 | rc = -EINVAL; | |
827 | goto free_cs_chunk_array; | |
828 | } | |
829 | ||
830 | if (cs_type == CS_TYPE_WAIT) { | |
831 | struct hl_cs_compl *sig_waitcs_cmpl; | |
832 | ||
833 | signal_seq_arr_len = chunk->num_signal_seq_arr; | |
834 | ||
835 | /* currently only one signal seq is supported */ | |
836 | if (signal_seq_arr_len != 1) { | |
837 | dev_err(hdev->dev, | |
838 | "Wait for signal CS supports only one signal CS seq\n"); | |
839 | rc = -EINVAL; | |
840 | goto free_cs_chunk_array; | |
841 | } | |
842 | ||
843 | signal_seq_arr = kmalloc_array(signal_seq_arr_len, | |
844 | sizeof(*signal_seq_arr), | |
845 | GFP_ATOMIC); | |
846 | if (!signal_seq_arr) { | |
847 | rc = -ENOMEM; | |
848 | goto free_cs_chunk_array; | |
849 | } | |
850 | ||
851 | size_to_copy = chunk->num_signal_seq_arr * | |
852 | sizeof(*signal_seq_arr); | |
853 | if (copy_from_user(signal_seq_arr, | |
05c8a4fc | 854 | u64_to_user_ptr(chunk->signal_seq_arr), |
b75f2250 OS |
855 | size_to_copy)) { |
856 | dev_err(hdev->dev, | |
857 | "Failed to copy signal seq array from user\n"); | |
858 | rc = -EFAULT; | |
859 | goto free_signal_seq_array; | |
860 | } | |
861 | ||
862 | /* currently it is guaranteed to have only one signal seq */ | |
863 | signal_seq = signal_seq_arr[0]; | |
864 | sig_fence = hl_ctx_get_fence(ctx, signal_seq); | |
865 | if (IS_ERR(sig_fence)) { | |
866 | dev_err(hdev->dev, | |
867 | "Failed to get signal CS with seq 0x%llx\n", | |
868 | signal_seq); | |
869 | rc = PTR_ERR(sig_fence); | |
870 | goto free_signal_seq_array; | |
871 | } | |
872 | ||
873 | if (!sig_fence) { | |
874 | /* signal CS already finished */ | |
875 | rc = 0; | |
876 | goto free_signal_seq_array; | |
877 | } | |
878 | ||
879 | sig_waitcs_cmpl = | |
880 | container_of(sig_fence, struct hl_cs_compl, base_fence); | |
881 | ||
882 | if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { | |
883 | dev_err(hdev->dev, | |
884 | "CS seq 0x%llx is not of a signal CS\n", | |
885 | signal_seq); | |
886 | dma_fence_put(sig_fence); | |
887 | rc = -EINVAL; | |
888 | goto free_signal_seq_array; | |
889 | } | |
890 | ||
891 | if (dma_fence_is_signaled(sig_fence)) { | |
892 | /* signal CS already finished */ | |
893 | dma_fence_put(sig_fence); | |
894 | rc = 0; | |
895 | goto free_signal_seq_array; | |
896 | } | |
897 | } | |
898 | ||
899 | /* increment refcnt for context */ | |
900 | hl_ctx_get(hdev, ctx); | |
901 | ||
902 | rc = allocate_cs(hdev, ctx, cs_type, &cs); | |
903 | if (rc) { | |
904 | if (cs_type == CS_TYPE_WAIT) | |
905 | dma_fence_put(sig_fence); | |
906 | hl_ctx_put(ctx); | |
907 | goto free_signal_seq_array; | |
908 | } | |
909 | ||
910 | /* | |
911 | * Save the signal CS fence for later initialization right before | |
912 | * hanging the wait CS on the queue. | |
913 | */ | |
914 | if (cs->type == CS_TYPE_WAIT) | |
915 | cs->signal_fence = sig_fence; | |
916 | ||
917 | hl_debugfs_add_cs(cs); | |
918 | ||
919 | *cs_seq = cs->sequence; | |
920 | ||
21e7a346 | 921 | job = hl_cs_allocate_job(hdev, q_type, true); |
b75f2250 | 922 | if (!job) { |
db491e4f | 923 | ctx->cs_counters.out_of_mem_drop_cnt++; |
b75f2250 OS |
924 | dev_err(hdev->dev, "Failed to allocate a new job\n"); |
925 | rc = -ENOMEM; | |
926 | goto put_cs; | |
927 | } | |
928 | ||
a04b7cd9 OB |
929 | if (cs->type == CS_TYPE_WAIT) |
930 | cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); | |
931 | else | |
932 | cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); | |
933 | ||
934 | cb = hl_cb_kernel_create(hdev, cb_size, | |
935 | q_type == QUEUE_TYPE_HW && hdev->mmu_enable); | |
b75f2250 | 936 | if (!cb) { |
db491e4f | 937 | ctx->cs_counters.out_of_mem_drop_cnt++; |
b75f2250 OS |
938 | kfree(job); |
939 | rc = -EFAULT; | |
940 | goto put_cs; | |
941 | } | |
942 | ||
b75f2250 OS |
943 | job->id = 0; |
944 | job->cs = cs; | |
945 | job->user_cb = cb; | |
946 | job->user_cb->cs_cnt++; | |
947 | job->user_cb_size = cb_size; | |
948 | job->hw_queue_id = q_idx; | |
949 | ||
950 | /* | |
951 | * No need in parsing, user CB is the patched CB. | |
952 | * We call hl_cb_destroy() out of two reasons - we don't need the CB in | |
953 | * the CB idr anymore and to decrement its refcount as it was | |
954 | * incremented inside hl_cb_kernel_create(). | |
955 | */ | |
956 | job->patched_cb = job->user_cb; | |
957 | job->job_cb_size = job->user_cb_size; | |
958 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); | |
959 | ||
960 | cs->jobs_in_queue_cnt[job->hw_queue_id]++; | |
961 | ||
962 | list_add_tail(&job->cs_node, &cs->job_list); | |
963 | ||
964 | /* increment refcount as for external queues we get completion */ | |
965 | cs_get(cs); | |
966 | ||
967 | hl_debugfs_add_job(hdev, job); | |
968 | ||
969 | rc = hl_hw_queue_schedule_cs(cs); | |
970 | if (rc) { | |
971 | if (rc != -EAGAIN) | |
972 | dev_err(hdev->dev, | |
973 | "Failed to submit CS %d.%llu to H/W queues, error %d\n", | |
974 | ctx->asid, cs->sequence, rc); | |
975 | goto free_cs_object; | |
976 | } | |
977 | ||
978 | rc = HL_CS_STATUS_SUCCESS; | |
979 | goto put_cs; | |
980 | ||
981 | free_cs_object: | |
982 | cs_rollback(hdev, cs); | |
983 | *cs_seq = ULLONG_MAX; | |
984 | /* The path below is both for good and erroneous exits */ | |
985 | put_cs: | |
986 | /* We finished with the CS in this function, so put the ref */ | |
987 | cs_put(cs); | |
988 | free_signal_seq_array: | |
989 | if (cs_type == CS_TYPE_WAIT) | |
990 | kfree(signal_seq_arr); | |
991 | free_cs_chunk_array: | |
992 | kfree(cs_chunk_array); | |
993 | out: | |
994 | return rc; | |
995 | } | |
996 | ||
eff6f4a0 OG |
997 | int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) |
998 | { | |
999 | struct hl_device *hdev = hpriv->hdev; | |
1000 | union hl_cs_args *args = data; | |
1001 | struct hl_ctx *ctx = hpriv->ctx; | |
1718a45b | 1002 | void __user *chunks_execute, *chunks_restore; |
b75f2250 | 1003 | enum hl_cs_type cs_type; |
f9e5f295 | 1004 | u32 num_chunks_execute, num_chunks_restore, sig_wait_flags; |
eff6f4a0 | 1005 | u64 cs_seq = ULONG_MAX; |
027d35d0 | 1006 | int rc, do_ctx_switch; |
eff6f4a0 OG |
1007 | bool need_soft_reset = false; |
1008 | ||
1009 | if (hl_device_disabled_or_in_reset(hdev)) { | |
680cb399 | 1010 | dev_warn_ratelimited(hdev->dev, |
eff6f4a0 OG |
1011 | "Device is %s. Can't submit new CS\n", |
1012 | atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); | |
1013 | rc = -EBUSY; | |
1014 | goto out; | |
1015 | } | |
1016 | ||
f9e5f295 OS |
1017 | sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; |
1018 | ||
b75f2250 OS |
1019 | if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) { |
1020 | dev_err(hdev->dev, | |
1021 | "Signal and wait CS flags are mutually exclusive, context %d\n", | |
1022 | ctx->asid); | |
1023 | rc = -EINVAL; | |
1024 | goto out; | |
1025 | } | |
1026 | ||
f9e5f295 OS |
1027 | if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) && |
1028 | (!hdev->supports_sync_stream))) { | |
1029 | dev_err(hdev->dev, "Sync stream CS is not supported\n"); | |
1030 | rc = -EINVAL; | |
1031 | goto out; | |
1032 | } | |
1033 | ||
b75f2250 OS |
1034 | if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL) |
1035 | cs_type = CS_TYPE_SIGNAL; | |
1036 | else if (args->in.cs_flags & HL_CS_FLAGS_WAIT) | |
1037 | cs_type = CS_TYPE_WAIT; | |
1038 | else | |
1039 | cs_type = CS_TYPE_DEFAULT; | |
1040 | ||
1718a45b TT |
1041 | chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; |
1042 | num_chunks_execute = args->in.num_chunks_execute; | |
1043 | ||
b75f2250 OS |
1044 | if (cs_type == CS_TYPE_DEFAULT) { |
1045 | if (!num_chunks_execute) { | |
1046 | dev_err(hdev->dev, | |
1047 | "Got execute CS with 0 chunks, context %d\n", | |
1048 | ctx->asid); | |
1049 | rc = -EINVAL; | |
1050 | goto out; | |
1051 | } | |
1052 | } else if (num_chunks_execute != 1) { | |
1718a45b | 1053 | dev_err(hdev->dev, |
b75f2250 | 1054 | "Sync stream CS mandates one chunk only, context %d\n", |
1718a45b TT |
1055 | ctx->asid); |
1056 | rc = -EINVAL; | |
1057 | goto out; | |
1058 | } | |
1059 | ||
027d35d0 | 1060 | do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); |
eff6f4a0 | 1061 | |
027d35d0 | 1062 | if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { |
eff6f4a0 OG |
1063 | long ret; |
1064 | ||
1718a45b TT |
1065 | chunks_restore = |
1066 | (void __user *) (uintptr_t) args->in.chunks_restore; | |
1067 | num_chunks_restore = args->in.num_chunks_restore; | |
eff6f4a0 OG |
1068 | |
1069 | mutex_lock(&hpriv->restore_phase_mutex); | |
1070 | ||
027d35d0 | 1071 | if (do_ctx_switch) { |
eff6f4a0 OG |
1072 | rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); |
1073 | if (rc) { | |
1074 | dev_err_ratelimited(hdev->dev, | |
1075 | "Failed to switch to context %d, rejecting CS! %d\n", | |
1076 | ctx->asid, rc); | |
1077 | /* | |
af5f7eea OG |
1078 | * If we timedout, or if the device is not IDLE |
1079 | * while we want to do context-switch (-EBUSY), | |
1080 | * we need to soft-reset because QMAN is | |
1081 | * probably stuck. However, we can't call to | |
1082 | * reset here directly because of deadlock, so | |
1083 | * need to do it at the very end of this | |
1084 | * function | |
eff6f4a0 | 1085 | */ |
af5f7eea | 1086 | if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) |
eff6f4a0 OG |
1087 | need_soft_reset = true; |
1088 | mutex_unlock(&hpriv->restore_phase_mutex); | |
1089 | goto out; | |
1090 | } | |
1091 | } | |
1092 | ||
1093 | hdev->asic_funcs->restore_phase_topology(hdev); | |
1094 | ||
1718a45b | 1095 | if (!num_chunks_restore) { |
eff6f4a0 OG |
1096 | dev_dbg(hdev->dev, |
1097 | "Need to run restore phase but restore CS is empty\n"); | |
1098 | rc = 0; | |
1099 | } else { | |
b75f2250 | 1100 | rc = cs_ioctl_default(hpriv, chunks_restore, |
1718a45b | 1101 | num_chunks_restore, &cs_seq); |
eff6f4a0 OG |
1102 | } |
1103 | ||
1104 | mutex_unlock(&hpriv->restore_phase_mutex); | |
1105 | ||
1106 | if (rc) { | |
1107 | dev_err(hdev->dev, | |
1108 | "Failed to submit restore CS for context %d (%d)\n", | |
1109 | ctx->asid, rc); | |
1110 | goto out; | |
1111 | } | |
1112 | ||
1113 | /* Need to wait for restore completion before execution phase */ | |
1718a45b | 1114 | if (num_chunks_restore) { |
eff6f4a0 OG |
1115 | ret = _hl_cs_wait_ioctl(hdev, ctx, |
1116 | jiffies_to_usecs(hdev->timeout_jiffies), | |
1117 | cs_seq); | |
1118 | if (ret <= 0) { | |
1119 | dev_err(hdev->dev, | |
1120 | "Restore CS for context %d failed to complete %ld\n", | |
1121 | ctx->asid, ret); | |
1122 | rc = -ENOEXEC; | |
1123 | goto out; | |
1124 | } | |
1125 | } | |
1126 | ||
027d35d0 OG |
1127 | ctx->thread_ctx_switch_wait_token = 1; |
1128 | } else if (!ctx->thread_ctx_switch_wait_token) { | |
eff6f4a0 OG |
1129 | u32 tmp; |
1130 | ||
1131 | rc = hl_poll_timeout_memory(hdev, | |
a08b51a9 | 1132 | &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), |
2aa4e410 | 1133 | 100, jiffies_to_usecs(hdev->timeout_jiffies), false); |
eff6f4a0 | 1134 | |
a08b51a9 | 1135 | if (rc == -ETIMEDOUT) { |
eff6f4a0 | 1136 | dev_err(hdev->dev, |
a08b51a9 | 1137 | "context switch phase timeout (%d)\n", tmp); |
eff6f4a0 OG |
1138 | goto out; |
1139 | } | |
1140 | } | |
1141 | ||
b75f2250 OS |
1142 | if (cs_type == CS_TYPE_DEFAULT) |
1143 | rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute, | |
1144 | &cs_seq); | |
1145 | else | |
1146 | rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute, | |
1147 | num_chunks_execute, &cs_seq); | |
eff6f4a0 OG |
1148 | |
1149 | out: | |
1150 | if (rc != -EAGAIN) { | |
1151 | memset(args, 0, sizeof(*args)); | |
1152 | args->out.status = rc; | |
1153 | args->out.seq = cs_seq; | |
1154 | } | |
1155 | ||
af5f7eea | 1156 | if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset)) |
eff6f4a0 OG |
1157 | hl_device_reset(hdev, false, false); |
1158 | ||
1159 | return rc; | |
1160 | } | |
1161 | ||
1162 | static long _hl_cs_wait_ioctl(struct hl_device *hdev, | |
1163 | struct hl_ctx *ctx, u64 timeout_us, u64 seq) | |
1164 | { | |
1165 | struct dma_fence *fence; | |
1166 | unsigned long timeout; | |
1167 | long rc; | |
1168 | ||
1169 | if (timeout_us == MAX_SCHEDULE_TIMEOUT) | |
1170 | timeout = timeout_us; | |
1171 | else | |
1172 | timeout = usecs_to_jiffies(timeout_us); | |
1173 | ||
1174 | hl_ctx_get(hdev, ctx); | |
1175 | ||
1176 | fence = hl_ctx_get_fence(ctx, seq); | |
1177 | if (IS_ERR(fence)) { | |
1178 | rc = PTR_ERR(fence); | |
b75f2250 OS |
1179 | if (rc == -EINVAL) |
1180 | dev_notice_ratelimited(hdev->dev, | |
0eab4f89 | 1181 | "Can't wait on CS %llu because current CS is at seq %llu\n", |
b75f2250 | 1182 | seq, ctx->cs_sequence); |
eff6f4a0 OG |
1183 | } else if (fence) { |
1184 | rc = dma_fence_wait_timeout(fence, true, timeout); | |
1185 | if (fence->error == -ETIMEDOUT) | |
1186 | rc = -ETIMEDOUT; | |
1187 | else if (fence->error == -EIO) | |
1188 | rc = -EIO; | |
1189 | dma_fence_put(fence); | |
b75f2250 OS |
1190 | } else { |
1191 | dev_dbg(hdev->dev, | |
1192 | "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", | |
1193 | seq, ctx->cs_sequence); | |
eff6f4a0 | 1194 | rc = 1; |
b75f2250 | 1195 | } |
eff6f4a0 OG |
1196 | |
1197 | hl_ctx_put(ctx); | |
1198 | ||
1199 | return rc; | |
1200 | } | |
1201 | ||
1202 | int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) | |
1203 | { | |
1204 | struct hl_device *hdev = hpriv->hdev; | |
1205 | union hl_wait_cs_args *args = data; | |
1206 | u64 seq = args->in.seq; | |
1207 | long rc; | |
1208 | ||
1209 | rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); | |
1210 | ||
1211 | memset(args, 0, sizeof(*args)); | |
1212 | ||
1213 | if (rc < 0) { | |
eff6f4a0 | 1214 | if (rc == -ERESTARTSYS) { |
0eab4f89 OG |
1215 | dev_err_ratelimited(hdev->dev, |
1216 | "user process got signal while waiting for CS handle %llu\n", | |
1217 | seq); | |
eff6f4a0 OG |
1218 | args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; |
1219 | rc = -EINTR; | |
1220 | } else if (rc == -ETIMEDOUT) { | |
0eab4f89 OG |
1221 | dev_err_ratelimited(hdev->dev, |
1222 | "CS %llu has timed-out while user process is waiting for it\n", | |
1223 | seq); | |
eff6f4a0 OG |
1224 | args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; |
1225 | } else if (rc == -EIO) { | |
0eab4f89 OG |
1226 | dev_err_ratelimited(hdev->dev, |
1227 | "CS %llu has been aborted while user process is waiting for it\n", | |
1228 | seq); | |
eff6f4a0 OG |
1229 | args->out.status = HL_WAIT_CS_STATUS_ABORTED; |
1230 | } | |
1231 | return rc; | |
1232 | } | |
1233 | ||
1234 | if (rc == 0) | |
1235 | args->out.status = HL_WAIT_CS_STATUS_BUSY; | |
1236 | else | |
1237 | args->out.status = HL_WAIT_CS_STATUS_COMPLETED; | |
1238 | ||
1239 | return 0; | |
1240 | } |