Commit | Line | Data |
---|---|---|
ff13be83 JH |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | ||
3 | /* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ | |
4 | /* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ | |
5 | ||
6 | #include <linux/bitfield.h> | |
7 | #include <linux/bits.h> | |
8 | #include <linux/completion.h> | |
9 | #include <linux/delay.h> | |
10 | #include <linux/dma-buf.h> | |
11 | #include <linux/dma-mapping.h> | |
12 | #include <linux/interrupt.h> | |
13 | #include <linux/kref.h> | |
14 | #include <linux/list.h> | |
15 | #include <linux/math64.h> | |
16 | #include <linux/mm.h> | |
17 | #include <linux/moduleparam.h> | |
18 | #include <linux/scatterlist.h> | |
19 | #include <linux/spinlock.h> | |
20 | #include <linux/srcu.h> | |
21 | #include <linux/types.h> | |
22 | #include <linux/uaccess.h> | |
23 | #include <linux/wait.h> | |
24 | #include <drm/drm_file.h> | |
25 | #include <drm/drm_gem.h> | |
8d0d16a3 | 26 | #include <drm/drm_prime.h> |
ff13be83 JH |
27 | #include <drm/drm_print.h> |
28 | #include <uapi/drm/qaic_accel.h> | |
29 | ||
30 | #include "qaic.h" | |
31 | ||
32 | #define SEM_VAL_MASK GENMASK_ULL(11, 0) | |
33 | #define SEM_INDEX_MASK GENMASK_ULL(4, 0) | |
34 | #define BULK_XFER BIT(3) | |
35 | #define GEN_COMPLETION BIT(4) | |
36 | #define INBOUND_XFER 1 | |
37 | #define OUTBOUND_XFER 2 | |
38 | #define REQHP_OFF 0x0 /* we read this */ | |
39 | #define REQTP_OFF 0x4 /* we write this */ | |
40 | #define RSPHP_OFF 0x8 /* we write this */ | |
41 | #define RSPTP_OFF 0xc /* we read this */ | |
42 | ||
43 | #define ENCODE_SEM(val, index, sync, cmd, flags) \ | |
44 | ({ \ | |
45 | FIELD_PREP(GENMASK(11, 0), (val)) | \ | |
46 | FIELD_PREP(GENMASK(20, 16), (index)) | \ | |
47 | FIELD_PREP(BIT(22), (sync)) | \ | |
48 | FIELD_PREP(GENMASK(26, 24), (cmd)) | \ | |
49 | FIELD_PREP(GENMASK(30, 29), (flags)) | \ | |
50 | FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \ | |
51 | }) | |
52 | #define NUM_EVENTS 128 | |
53 | #define NUM_DELAYS 10 | |
54 | ||
55 | static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */ | |
56 | module_param(wait_exec_default_timeout_ms, uint, 0600); | |
57 | MODULE_PARM_DESC(wait_exec_default_timeout_ms, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO"); | |
58 | ||
59 | static unsigned int datapath_poll_interval_us = 100; /* 100 usec default */ | |
60 | module_param(datapath_poll_interval_us, uint, 0600); | |
61 | MODULE_PARM_DESC(datapath_poll_interval_us, | |
62 | "Amount of time to sleep between activity when datapath polling is enabled"); | |
63 | ||
64 | struct dbc_req { | |
65 | /* | |
66 | * A request ID is assigned to each memory handle going in DMA queue. | |
67 | * As a single memory handle can enqueue multiple elements in DMA queue | |
68 | * all of them will have the same request ID. | |
69 | */ | |
70 | __le16 req_id; | |
71 | /* Future use */ | |
72 | __u8 seq_id; | |
73 | /* | |
74 | * Special encoded variable | |
75 | * 7 0 - Do not force to generate MSI after DMA is completed | |
76 | * 1 - Force to generate MSI after DMA is completed | |
77 | * 6:5 Reserved | |
78 | * 4 1 - Generate completion element in the response queue | |
79 | * 0 - No Completion Code | |
80 | * 3 0 - DMA request is a Link list transfer | |
81 | * 1 - DMA request is a Bulk transfer | |
82 | * 2 Reserved | |
83 | * 1:0 00 - No DMA transfer involved | |
84 | * 01 - DMA transfer is part of inbound transfer | |
85 | * 10 - DMA transfer has outbound transfer | |
86 | * 11 - NA | |
87 | */ | |
88 | __u8 cmd; | |
89 | __le32 resv; | |
90 | /* Source address for the transfer */ | |
91 | __le64 src_addr; | |
92 | /* Destination address for the transfer */ | |
93 | __le64 dest_addr; | |
94 | /* Length of transfer request */ | |
95 | __le32 len; | |
96 | __le32 resv2; | |
97 | /* Doorbell address */ | |
98 | __le64 db_addr; | |
99 | /* | |
100 | * Special encoded variable | |
101 | * 7 1 - Doorbell(db) write | |
102 | * 0 - No doorbell write | |
103 | * 6:2 Reserved | |
104 | * 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary | |
105 | * 01 - 16 bit access, db address must be aligned to 16bit-boundary | |
106 | * 10 - 8 bit access, db address must be aligned to 8bit-boundary | |
107 | * 11 - Reserved | |
108 | */ | |
109 | __u8 db_len; | |
110 | __u8 resv3; | |
111 | __le16 resv4; | |
112 | /* 32 bit data written to doorbell address */ | |
113 | __le32 db_data; | |
114 | /* | |
115 | * Special encoded variable | |
116 | * All the fields of sem_cmdX are passed from user and all are ORed | |
117 | * together to form sem_cmd. | |
118 | * 0:11 Semaphore value | |
119 | * 15:12 Reserved | |
120 | * 20:16 Semaphore index | |
121 | * 21 Reserved | |
122 | * 22 Semaphore Sync | |
123 | * 23 Reserved | |
124 | * 26:24 Semaphore command | |
125 | * 28:27 Reserved | |
126 | * 29 Semaphore DMA out bound sync fence | |
127 | * 30 Semaphore DMA in bound sync fence | |
128 | * 31 Enable semaphore command | |
129 | */ | |
130 | __le32 sem_cmd0; | |
131 | __le32 sem_cmd1; | |
132 | __le32 sem_cmd2; | |
133 | __le32 sem_cmd3; | |
134 | } __packed; | |
135 | ||
136 | struct dbc_rsp { | |
137 | /* Request ID of the memory handle whose DMA transaction is completed */ | |
138 | __le16 req_id; | |
139 | /* Status of the DMA transaction. 0 : Success otherwise failure */ | |
140 | __le16 status; | |
141 | } __packed; | |
142 | ||
143 | inline int get_dbc_req_elem_size(void) | |
144 | { | |
145 | return sizeof(struct dbc_req); | |
146 | } | |
147 | ||
148 | inline int get_dbc_rsp_elem_size(void) | |
149 | { | |
150 | return sizeof(struct dbc_rsp); | |
151 | } | |
152 | ||
153 | static void free_slice(struct kref *kref) | |
154 | { | |
155 | struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count); | |
156 | ||
157 | list_del(&slice->slice); | |
158 | drm_gem_object_put(&slice->bo->base); | |
159 | sg_free_table(slice->sgt); | |
160 | kfree(slice->sgt); | |
161 | kfree(slice->reqs); | |
162 | kfree(slice); | |
163 | } | |
164 | ||
165 | static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out, | |
166 | struct sg_table *sgt_in, u64 size, u64 offset) | |
167 | { | |
168 | int total_len, len, nents, offf = 0, offl = 0; | |
169 | struct scatterlist *sg, *sgn, *sgf, *sgl; | |
170 | struct sg_table *sgt; | |
171 | int ret, j; | |
172 | ||
173 | /* find out number of relevant nents needed for this mem */ | |
174 | total_len = 0; | |
175 | sgf = NULL; | |
176 | sgl = NULL; | |
177 | nents = 0; | |
178 | ||
179 | size = size ? size : PAGE_SIZE; | |
180 | for (sg = sgt_in->sgl; sg; sg = sg_next(sg)) { | |
181 | len = sg_dma_len(sg); | |
182 | ||
183 | if (!len) | |
184 | continue; | |
185 | if (offset >= total_len && offset < total_len + len) { | |
186 | sgf = sg; | |
187 | offf = offset - total_len; | |
188 | } | |
189 | if (sgf) | |
190 | nents++; | |
191 | if (offset + size >= total_len && | |
192 | offset + size <= total_len + len) { | |
193 | sgl = sg; | |
194 | offl = offset + size - total_len; | |
195 | break; | |
196 | } | |
197 | total_len += len; | |
198 | } | |
199 | ||
200 | if (!sgf || !sgl) { | |
201 | ret = -EINVAL; | |
202 | goto out; | |
203 | } | |
204 | ||
205 | sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); | |
206 | if (!sgt) { | |
207 | ret = -ENOMEM; | |
208 | goto out; | |
209 | } | |
210 | ||
211 | ret = sg_alloc_table(sgt, nents, GFP_KERNEL); | |
212 | if (ret) | |
213 | goto free_sgt; | |
214 | ||
215 | /* copy relevant sg node and fix page and length */ | |
216 | sgn = sgf; | |
217 | for_each_sgtable_sg(sgt, sg, j) { | |
218 | memcpy(sg, sgn, sizeof(*sg)); | |
219 | if (sgn == sgf) { | |
220 | sg_dma_address(sg) += offf; | |
221 | sg_dma_len(sg) -= offf; | |
222 | sg_set_page(sg, sg_page(sgn), sg_dma_len(sg), offf); | |
223 | } else { | |
224 | offf = 0; | |
225 | } | |
226 | if (sgn == sgl) { | |
227 | sg_dma_len(sg) = offl - offf; | |
228 | sg_set_page(sg, sg_page(sgn), offl - offf, offf); | |
229 | sg_mark_end(sg); | |
230 | break; | |
231 | } | |
232 | sgn = sg_next(sgn); | |
233 | } | |
234 | ||
235 | *sgt_out = sgt; | |
236 | return ret; | |
237 | ||
238 | free_sgt: | |
239 | kfree(sgt); | |
240 | out: | |
241 | *sgt_out = NULL; | |
242 | return ret; | |
243 | } | |
244 | ||
245 | static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice, | |
246 | struct qaic_attach_slice_entry *req) | |
247 | { | |
248 | __le64 db_addr = cpu_to_le64(req->db_addr); | |
249 | __le32 db_data = cpu_to_le32(req->db_data); | |
250 | struct scatterlist *sg; | |
251 | __u8 cmd = BULK_XFER; | |
252 | int presync_sem; | |
253 | u64 dev_addr; | |
254 | __u8 db_len; | |
255 | int i; | |
256 | ||
257 | if (!slice->no_xfer) | |
258 | cmd |= (slice->dir == DMA_TO_DEVICE ? INBOUND_XFER : OUTBOUND_XFER); | |
259 | ||
260 | if (req->db_len && !IS_ALIGNED(req->db_addr, req->db_len / 8)) | |
261 | return -EINVAL; | |
262 | ||
263 | presync_sem = req->sem0.presync + req->sem1.presync + req->sem2.presync + req->sem3.presync; | |
264 | if (presync_sem > 1) | |
265 | return -EINVAL; | |
266 | ||
267 | presync_sem = req->sem0.presync << 0 | req->sem1.presync << 1 | | |
268 | req->sem2.presync << 2 | req->sem3.presync << 3; | |
269 | ||
270 | switch (req->db_len) { | |
271 | case 32: | |
272 | db_len = BIT(7); | |
273 | break; | |
274 | case 16: | |
275 | db_len = BIT(7) | 1; | |
276 | break; | |
277 | case 8: | |
278 | db_len = BIT(7) | 2; | |
279 | break; | |
280 | case 0: | |
281 | db_len = 0; /* doorbell is not active for this command */ | |
282 | break; | |
283 | default: | |
284 | return -EINVAL; /* should never hit this */ | |
285 | } | |
286 | ||
287 | /* | |
288 | * When we end up splitting up a single request (ie a buf slice) into | |
289 | * multiple DMA requests, we have to manage the sync data carefully. | |
290 | * There can only be one presync sem. That needs to be on every xfer | |
291 | * so that the DMA engine doesn't transfer data before the receiver is | |
292 | * ready. We only do the doorbell and postsync sems after the xfer. | |
293 | * To guarantee previous xfers for the request are complete, we use a | |
294 | * fence. | |
295 | */ | |
296 | dev_addr = req->dev_addr; | |
297 | for_each_sgtable_sg(slice->sgt, sg, i) { | |
298 | slice->reqs[i].cmd = cmd; | |
299 | slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? | |
300 | sg_dma_address(sg) : dev_addr); | |
301 | slice->reqs[i].dest_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? | |
302 | dev_addr : sg_dma_address(sg)); | |
303 | /* | |
304 | * sg_dma_len(sg) returns size of a DMA segment, maximum DMA | |
305 | * segment size is set to UINT_MAX by qaic and hence return | |
306 | * values of sg_dma_len(sg) can never exceed u32 range. So, | |
307 | * by down sizing we are not corrupting the value. | |
308 | */ | |
309 | slice->reqs[i].len = cpu_to_le32((u32)sg_dma_len(sg)); | |
310 | switch (presync_sem) { | |
311 | case BIT(0): | |
312 | slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, | |
313 | req->sem0.index, | |
314 | req->sem0.presync, | |
315 | req->sem0.cmd, | |
316 | req->sem0.flags)); | |
317 | break; | |
318 | case BIT(1): | |
319 | slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, | |
320 | req->sem1.index, | |
321 | req->sem1.presync, | |
322 | req->sem1.cmd, | |
323 | req->sem1.flags)); | |
324 | break; | |
325 | case BIT(2): | |
326 | slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, | |
327 | req->sem2.index, | |
328 | req->sem2.presync, | |
329 | req->sem2.cmd, | |
330 | req->sem2.flags)); | |
331 | break; | |
332 | case BIT(3): | |
333 | slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, | |
334 | req->sem3.index, | |
335 | req->sem3.presync, | |
336 | req->sem3.cmd, | |
337 | req->sem3.flags)); | |
338 | break; | |
339 | } | |
340 | dev_addr += sg_dma_len(sg); | |
341 | } | |
342 | /* add post transfer stuff to last segment */ | |
343 | i--; | |
344 | slice->reqs[i].cmd |= GEN_COMPLETION; | |
345 | slice->reqs[i].db_addr = db_addr; | |
346 | slice->reqs[i].db_len = db_len; | |
347 | slice->reqs[i].db_data = db_data; | |
348 | /* | |
349 | * Add a fence if we have more than one request going to the hardware | |
350 | * representing the entirety of the user request, and the user request | |
351 | * has no presync condition. | |
352 | * Fences are expensive, so we try to avoid them. We rely on the | |
353 | * hardware behavior to avoid needing one when there is a presync | |
354 | * condition. When a presync exists, all requests for that same | |
355 | * presync will be queued into a fifo. Thus, since we queue the | |
356 | * post xfer activity only on the last request we queue, the hardware | |
357 | * will ensure that the last queued request is processed last, thus | |
358 | * making sure the post xfer activity happens at the right time without | |
359 | * a fence. | |
360 | */ | |
361 | if (i && !presync_sem) | |
362 | req->sem0.flags |= (slice->dir == DMA_TO_DEVICE ? | |
363 | QAIC_SEM_INSYNCFENCE : QAIC_SEM_OUTSYNCFENCE); | |
364 | slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, req->sem0.index, | |
365 | req->sem0.presync, req->sem0.cmd, | |
366 | req->sem0.flags)); | |
367 | slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, req->sem1.index, | |
368 | req->sem1.presync, req->sem1.cmd, | |
369 | req->sem1.flags)); | |
370 | slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, req->sem2.index, | |
371 | req->sem2.presync, req->sem2.cmd, | |
372 | req->sem2.flags)); | |
373 | slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, req->sem3.index, | |
374 | req->sem3.presync, req->sem3.cmd, | |
375 | req->sem3.flags)); | |
376 | ||
377 | return 0; | |
378 | } | |
379 | ||
380 | static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo, | |
381 | struct qaic_attach_slice_entry *slice_ent) | |
382 | { | |
383 | struct sg_table *sgt = NULL; | |
384 | struct bo_slice *slice; | |
385 | int ret; | |
386 | ||
387 | ret = clone_range_of_sgt_for_slice(qdev, &sgt, bo->sgt, slice_ent->size, slice_ent->offset); | |
388 | if (ret) | |
389 | goto out; | |
390 | ||
391 | slice = kmalloc(sizeof(*slice), GFP_KERNEL); | |
392 | if (!slice) { | |
393 | ret = -ENOMEM; | |
394 | goto free_sgt; | |
395 | } | |
396 | ||
397 | slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL); | |
398 | if (!slice->reqs) { | |
399 | ret = -ENOMEM; | |
400 | goto free_slice; | |
401 | } | |
402 | ||
403 | slice->no_xfer = !slice_ent->size; | |
404 | slice->sgt = sgt; | |
405 | slice->nents = sgt->nents; | |
406 | slice->dir = bo->dir; | |
407 | slice->bo = bo; | |
408 | slice->size = slice_ent->size; | |
409 | slice->offset = slice_ent->offset; | |
410 | ||
411 | ret = encode_reqs(qdev, slice, slice_ent); | |
412 | if (ret) | |
413 | goto free_req; | |
414 | ||
415 | bo->total_slice_nents += sgt->nents; | |
416 | kref_init(&slice->ref_count); | |
417 | drm_gem_object_get(&bo->base); | |
418 | list_add_tail(&slice->slice, &bo->slices); | |
419 | ||
420 | return 0; | |
421 | ||
422 | free_req: | |
423 | kfree(slice->reqs); | |
424 | free_slice: | |
425 | kfree(slice); | |
426 | free_sgt: | |
427 | sg_free_table(sgt); | |
428 | kfree(sgt); | |
429 | out: | |
430 | return ret; | |
431 | } | |
432 | ||
433 | static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 size) | |
434 | { | |
435 | struct scatterlist *sg; | |
436 | struct sg_table *sgt; | |
437 | struct page **pages; | |
438 | int *pages_order; | |
439 | int buf_extra; | |
440 | int max_order; | |
441 | int nr_pages; | |
442 | int ret = 0; | |
443 | int i, j, k; | |
444 | int order; | |
445 | ||
446 | if (size) { | |
447 | nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); | |
448 | /* | |
449 | * calculate how much extra we are going to allocate, to remove | |
450 | * later | |
451 | */ | |
452 | buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE; | |
453 | max_order = min(MAX_ORDER - 1, get_order(size)); | |
454 | } else { | |
455 | /* allocate a single page for book keeping */ | |
456 | nr_pages = 1; | |
457 | buf_extra = 0; | |
458 | max_order = 0; | |
459 | } | |
460 | ||
461 | pages = kvmalloc_array(nr_pages, sizeof(*pages) + sizeof(*pages_order), GFP_KERNEL); | |
462 | if (!pages) { | |
463 | ret = -ENOMEM; | |
464 | goto out; | |
465 | } | |
466 | pages_order = (void *)pages + sizeof(*pages) * nr_pages; | |
467 | ||
468 | /* | |
469 | * Allocate requested memory using alloc_pages. It is possible to allocate | |
470 | * the requested memory in multiple chunks by calling alloc_pages | |
471 | * multiple times. Use SG table to handle multiple allocated pages. | |
472 | */ | |
473 | i = 0; | |
474 | while (nr_pages > 0) { | |
475 | order = min(get_order(nr_pages * PAGE_SIZE), max_order); | |
476 | while (1) { | |
477 | pages[i] = alloc_pages(GFP_KERNEL | GFP_HIGHUSER | | |
478 | __GFP_NOWARN | __GFP_ZERO | | |
479 | (order ? __GFP_NORETRY : __GFP_RETRY_MAYFAIL), | |
480 | order); | |
481 | if (pages[i]) | |
482 | break; | |
483 | if (!order--) { | |
484 | ret = -ENOMEM; | |
485 | goto free_partial_alloc; | |
486 | } | |
487 | } | |
488 | ||
489 | max_order = order; | |
490 | pages_order[i] = order; | |
491 | ||
492 | nr_pages -= 1 << order; | |
493 | if (nr_pages <= 0) | |
494 | /* account for over allocation */ | |
495 | buf_extra += abs(nr_pages) * PAGE_SIZE; | |
496 | i++; | |
497 | } | |
498 | ||
499 | sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); | |
500 | if (!sgt) { | |
501 | ret = -ENOMEM; | |
502 | goto free_partial_alloc; | |
503 | } | |
504 | ||
505 | if (sg_alloc_table(sgt, i, GFP_KERNEL)) { | |
506 | ret = -ENOMEM; | |
507 | goto free_sgt; | |
508 | } | |
509 | ||
510 | /* Populate the SG table with the allocated memory pages */ | |
511 | sg = sgt->sgl; | |
512 | for (k = 0; k < i; k++, sg = sg_next(sg)) { | |
513 | /* Last entry requires special handling */ | |
514 | if (k < i - 1) { | |
515 | sg_set_page(sg, pages[k], PAGE_SIZE << pages_order[k], 0); | |
516 | } else { | |
517 | sg_set_page(sg, pages[k], (PAGE_SIZE << pages_order[k]) - buf_extra, 0); | |
518 | sg_mark_end(sg); | |
519 | } | |
520 | } | |
521 | ||
522 | kvfree(pages); | |
523 | *sgt_out = sgt; | |
524 | return ret; | |
525 | ||
526 | free_sgt: | |
527 | kfree(sgt); | |
528 | free_partial_alloc: | |
529 | for (j = 0; j < i; j++) | |
530 | __free_pages(pages[j], pages_order[j]); | |
531 | kvfree(pages); | |
532 | out: | |
533 | *sgt_out = NULL; | |
534 | return ret; | |
535 | } | |
536 | ||
537 | static bool invalid_sem(struct qaic_sem *sem) | |
538 | { | |
539 | if (sem->val & ~SEM_VAL_MASK || sem->index & ~SEM_INDEX_MASK || | |
540 | !(sem->presync == 0 || sem->presync == 1) || sem->pad || | |
541 | sem->flags & ~(QAIC_SEM_INSYNCFENCE | QAIC_SEM_OUTSYNCFENCE) || | |
542 | sem->cmd > QAIC_SEM_WAIT_GT_0) | |
543 | return true; | |
544 | return false; | |
545 | } | |
546 | ||
547 | static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent, | |
548 | u32 count, u64 total_size) | |
549 | { | |
550 | int i; | |
551 | ||
552 | for (i = 0; i < count; i++) { | |
553 | if (!(slice_ent[i].db_len == 32 || slice_ent[i].db_len == 16 || | |
554 | slice_ent[i].db_len == 8 || slice_ent[i].db_len == 0) || | |
555 | invalid_sem(&slice_ent[i].sem0) || invalid_sem(&slice_ent[i].sem1) || | |
556 | invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3)) | |
557 | return -EINVAL; | |
558 | ||
559 | if (slice_ent[i].offset + slice_ent[i].size > total_size) | |
560 | return -EINVAL; | |
561 | } | |
562 | ||
563 | return 0; | |
564 | } | |
565 | ||
566 | static void qaic_free_sgt(struct sg_table *sgt) | |
567 | { | |
568 | struct scatterlist *sg; | |
569 | ||
570 | for (sg = sgt->sgl; sg; sg = sg_next(sg)) | |
571 | if (sg_page(sg)) | |
572 | __free_pages(sg_page(sg), get_order(sg->length)); | |
573 | sg_free_table(sgt); | |
574 | kfree(sgt); | |
575 | } | |
576 | ||
577 | static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent, | |
578 | const struct drm_gem_object *obj) | |
579 | { | |
580 | struct qaic_bo *bo = to_qaic_bo(obj); | |
581 | ||
582 | drm_printf_indent(p, indent, "user requested size=%llu\n", bo->size); | |
583 | } | |
584 | ||
585 | static const struct vm_operations_struct drm_vm_ops = { | |
586 | .open = drm_gem_vm_open, | |
587 | .close = drm_gem_vm_close, | |
588 | }; | |
589 | ||
590 | static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) | |
591 | { | |
592 | struct qaic_bo *bo = to_qaic_bo(obj); | |
593 | unsigned long offset = 0; | |
594 | struct scatterlist *sg; | |
0e163e54 | 595 | int ret = 0; |
ff13be83 JH |
596 | |
597 | if (obj->import_attach) | |
598 | return -EINVAL; | |
599 | ||
600 | for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { | |
601 | if (sg_page(sg)) { | |
602 | ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)), | |
603 | sg->length, vma->vm_page_prot); | |
604 | if (ret) | |
605 | goto out; | |
606 | offset += sg->length; | |
607 | } | |
608 | } | |
609 | ||
610 | out: | |
611 | return ret; | |
612 | } | |
613 | ||
614 | static void qaic_free_object(struct drm_gem_object *obj) | |
615 | { | |
616 | struct qaic_bo *bo = to_qaic_bo(obj); | |
617 | ||
618 | if (obj->import_attach) { | |
619 | /* DMABUF/PRIME Path */ | |
8d0d16a3 | 620 | drm_prime_gem_destroy(obj, NULL); |
ff13be83 JH |
621 | } else { |
622 | /* Private buffer allocation path */ | |
623 | qaic_free_sgt(bo->sgt); | |
624 | } | |
625 | ||
626 | drm_gem_object_release(obj); | |
627 | kfree(bo); | |
628 | } | |
629 | ||
630 | static const struct drm_gem_object_funcs qaic_gem_funcs = { | |
631 | .free = qaic_free_object, | |
632 | .print_info = qaic_gem_print_info, | |
633 | .mmap = qaic_gem_object_mmap, | |
634 | .vm_ops = &drm_vm_ops, | |
635 | }; | |
636 | ||
637 | static struct qaic_bo *qaic_alloc_init_bo(void) | |
638 | { | |
639 | struct qaic_bo *bo; | |
640 | ||
641 | bo = kzalloc(sizeof(*bo), GFP_KERNEL); | |
642 | if (!bo) | |
643 | return ERR_PTR(-ENOMEM); | |
644 | ||
645 | INIT_LIST_HEAD(&bo->slices); | |
646 | init_completion(&bo->xfer_done); | |
647 | complete_all(&bo->xfer_done); | |
648 | ||
649 | return bo; | |
650 | } | |
651 | ||
652 | int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) | |
653 | { | |
654 | struct qaic_create_bo *args = data; | |
655 | int usr_rcu_id, qdev_rcu_id; | |
656 | struct drm_gem_object *obj; | |
657 | struct qaic_device *qdev; | |
658 | struct qaic_user *usr; | |
659 | struct qaic_bo *bo; | |
660 | size_t size; | |
661 | int ret; | |
662 | ||
663 | if (args->pad) | |
664 | return -EINVAL; | |
665 | ||
d3b277b7 PRAK |
666 | size = PAGE_ALIGN(args->size); |
667 | if (size == 0) | |
668 | return -EINVAL; | |
669 | ||
ff13be83 JH |
670 | usr = file_priv->driver_priv; |
671 | usr_rcu_id = srcu_read_lock(&usr->qddev_lock); | |
672 | if (!usr->qddev) { | |
673 | ret = -ENODEV; | |
674 | goto unlock_usr_srcu; | |
675 | } | |
676 | ||
677 | qdev = usr->qddev->qdev; | |
678 | qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); | |
679 | if (qdev->in_reset) { | |
680 | ret = -ENODEV; | |
681 | goto unlock_dev_srcu; | |
682 | } | |
683 | ||
ff13be83 JH |
684 | bo = qaic_alloc_init_bo(); |
685 | if (IS_ERR(bo)) { | |
686 | ret = PTR_ERR(bo); | |
687 | goto unlock_dev_srcu; | |
688 | } | |
689 | obj = &bo->base; | |
690 | ||
691 | drm_gem_private_object_init(dev, obj, size); | |
692 | ||
693 | obj->funcs = &qaic_gem_funcs; | |
694 | ret = create_sgt(qdev, &bo->sgt, size); | |
695 | if (ret) | |
696 | goto free_bo; | |
697 | ||
698 | bo->size = args->size; | |
699 | ||
700 | ret = drm_gem_handle_create(file_priv, obj, &args->handle); | |
701 | if (ret) | |
702 | goto free_sgt; | |
703 | ||
704 | bo->handle = args->handle; | |
705 | drm_gem_object_put(obj); | |
706 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); | |
707 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
708 | ||
709 | return 0; | |
710 | ||
711 | free_sgt: | |
712 | qaic_free_sgt(bo->sgt); | |
713 | free_bo: | |
714 | kfree(bo); | |
715 | unlock_dev_srcu: | |
716 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); | |
717 | unlock_usr_srcu: | |
718 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
719 | return ret; | |
720 | } | |
721 | ||
722 | int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) | |
723 | { | |
724 | struct qaic_mmap_bo *args = data; | |
725 | int usr_rcu_id, qdev_rcu_id; | |
726 | struct drm_gem_object *obj; | |
727 | struct qaic_device *qdev; | |
728 | struct qaic_user *usr; | |
729 | int ret; | |
730 | ||
731 | usr = file_priv->driver_priv; | |
732 | usr_rcu_id = srcu_read_lock(&usr->qddev_lock); | |
733 | if (!usr->qddev) { | |
734 | ret = -ENODEV; | |
735 | goto unlock_usr_srcu; | |
736 | } | |
737 | ||
738 | qdev = usr->qddev->qdev; | |
739 | qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); | |
740 | if (qdev->in_reset) { | |
741 | ret = -ENODEV; | |
742 | goto unlock_dev_srcu; | |
743 | } | |
744 | ||
745 | obj = drm_gem_object_lookup(file_priv, args->handle); | |
746 | if (!obj) { | |
747 | ret = -ENOENT; | |
748 | goto unlock_dev_srcu; | |
749 | } | |
750 | ||
751 | ret = drm_gem_create_mmap_offset(obj); | |
752 | if (ret == 0) | |
753 | args->offset = drm_vma_node_offset_addr(&obj->vma_node); | |
754 | ||
755 | drm_gem_object_put(obj); | |
756 | ||
757 | unlock_dev_srcu: | |
758 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); | |
759 | unlock_usr_srcu: | |
760 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
761 | return ret; | |
762 | } | |
763 | ||
764 | struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) | |
765 | { | |
766 | struct dma_buf_attachment *attach; | |
767 | struct drm_gem_object *obj; | |
768 | struct qaic_bo *bo; | |
769 | size_t size; | |
770 | int ret; | |
771 | ||
772 | bo = qaic_alloc_init_bo(); | |
773 | if (IS_ERR(bo)) { | |
774 | ret = PTR_ERR(bo); | |
775 | goto out; | |
776 | } | |
777 | ||
778 | obj = &bo->base; | |
779 | get_dma_buf(dma_buf); | |
780 | ||
781 | attach = dma_buf_attach(dma_buf, dev->dev); | |
782 | if (IS_ERR(attach)) { | |
783 | ret = PTR_ERR(attach); | |
784 | goto attach_fail; | |
785 | } | |
786 | ||
787 | size = PAGE_ALIGN(attach->dmabuf->size); | |
788 | if (size == 0) { | |
789 | ret = -EINVAL; | |
790 | goto size_align_fail; | |
791 | } | |
792 | ||
793 | drm_gem_private_object_init(dev, obj, size); | |
794 | /* | |
795 | * skipping dma_buf_map_attachment() as we do not know the direction | |
796 | * just yet. Once the direction is known in the subsequent IOCTL to | |
797 | * attach slicing, we can do it then. | |
798 | */ | |
799 | ||
800 | obj->funcs = &qaic_gem_funcs; | |
801 | obj->import_attach = attach; | |
802 | obj->resv = dma_buf->resv; | |
803 | ||
804 | return obj; | |
805 | ||
806 | size_align_fail: | |
807 | dma_buf_detach(dma_buf, attach); | |
808 | attach_fail: | |
809 | dma_buf_put(dma_buf); | |
810 | kfree(bo); | |
811 | out: | |
812 | return ERR_PTR(ret); | |
813 | } | |
814 | ||
815 | static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_hdr *hdr) | |
816 | { | |
817 | struct drm_gem_object *obj = &bo->base; | |
818 | struct sg_table *sgt; | |
819 | int ret; | |
820 | ||
821 | if (obj->import_attach->dmabuf->size < hdr->size) | |
822 | return -EINVAL; | |
823 | ||
824 | sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir); | |
825 | if (IS_ERR(sgt)) { | |
826 | ret = PTR_ERR(sgt); | |
827 | return ret; | |
828 | } | |
829 | ||
830 | bo->sgt = sgt; | |
831 | bo->size = hdr->size; | |
832 | ||
833 | return 0; | |
834 | } | |
835 | ||
836 | static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo, | |
837 | struct qaic_attach_slice_hdr *hdr) | |
838 | { | |
839 | int ret; | |
840 | ||
841 | if (bo->size != hdr->size) | |
842 | return -EINVAL; | |
843 | ||
844 | ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0); | |
845 | if (ret) | |
846 | return -EFAULT; | |
847 | ||
848 | return 0; | |
849 | } | |
850 | ||
851 | static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo, | |
852 | struct qaic_attach_slice_hdr *hdr) | |
853 | { | |
854 | int ret; | |
855 | ||
856 | if (bo->base.import_attach) | |
857 | ret = qaic_prepare_import_bo(bo, hdr); | |
858 | else | |
859 | ret = qaic_prepare_export_bo(qdev, bo, hdr); | |
860 | ||
861 | if (ret == 0) | |
862 | bo->dir = hdr->dir; | |
863 | ||
864 | return ret; | |
865 | } | |
866 | ||
867 | static void qaic_unprepare_import_bo(struct qaic_bo *bo) | |
868 | { | |
869 | dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir); | |
870 | bo->sgt = NULL; | |
871 | bo->size = 0; | |
872 | } | |
873 | ||
874 | static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo) | |
875 | { | |
876 | dma_unmap_sgtable(&qdev->pdev->dev, bo->sgt, bo->dir, 0); | |
877 | } | |
878 | ||
879 | static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo) | |
880 | { | |
881 | if (bo->base.import_attach) | |
882 | qaic_unprepare_import_bo(bo); | |
883 | else | |
884 | qaic_unprepare_export_bo(qdev, bo); | |
885 | ||
886 | bo->dir = 0; | |
887 | } | |
888 | ||
889 | static void qaic_free_slices_bo(struct qaic_bo *bo) | |
890 | { | |
891 | struct bo_slice *slice, *temp; | |
892 | ||
893 | list_for_each_entry_safe(slice, temp, &bo->slices, slice) | |
894 | kref_put(&slice->ref_count, free_slice); | |
895 | } | |
896 | ||
897 | static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo, | |
898 | struct qaic_attach_slice_hdr *hdr, | |
899 | struct qaic_attach_slice_entry *slice_ent) | |
900 | { | |
901 | int ret, i; | |
902 | ||
903 | for (i = 0; i < hdr->count; i++) { | |
904 | ret = qaic_map_one_slice(qdev, bo, &slice_ent[i]); | |
905 | if (ret) { | |
906 | qaic_free_slices_bo(bo); | |
907 | return ret; | |
908 | } | |
909 | } | |
910 | ||
911 | if (bo->total_slice_nents > qdev->dbc[hdr->dbc_id].nelem) { | |
912 | qaic_free_slices_bo(bo); | |
913 | return -ENOSPC; | |
914 | } | |
915 | ||
916 | bo->sliced = true; | |
917 | bo->nr_slice = hdr->count; | |
918 | list_add_tail(&bo->bo_list, &qdev->dbc[hdr->dbc_id].bo_lists); | |
919 | ||
920 | return 0; | |
921 | } | |
922 | ||
923 | int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) | |
924 | { | |
925 | struct qaic_attach_slice_entry *slice_ent; | |
926 | struct qaic_attach_slice *args = data; | |
75af0a58 | 927 | int rcu_id, usr_rcu_id, qdev_rcu_id; |
ff13be83 | 928 | struct dma_bridge_chan *dbc; |
ff13be83 JH |
929 | struct drm_gem_object *obj; |
930 | struct qaic_device *qdev; | |
931 | unsigned long arg_size; | |
932 | struct qaic_user *usr; | |
933 | u8 __user *user_data; | |
934 | struct qaic_bo *bo; | |
935 | int ret; | |
936 | ||
d3b277b7 PRAK |
937 | if (args->hdr.count == 0) |
938 | return -EINVAL; | |
939 | ||
940 | arg_size = args->hdr.count * sizeof(*slice_ent); | |
941 | if (arg_size / args->hdr.count != sizeof(*slice_ent)) | |
942 | return -EINVAL; | |
943 | ||
944 | if (args->hdr.size == 0) | |
945 | return -EINVAL; | |
946 | ||
947 | if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE)) | |
948 | return -EINVAL; | |
949 | ||
950 | if (args->data == 0) | |
951 | return -EINVAL; | |
952 | ||
ff13be83 JH |
953 | usr = file_priv->driver_priv; |
954 | usr_rcu_id = srcu_read_lock(&usr->qddev_lock); | |
955 | if (!usr->qddev) { | |
956 | ret = -ENODEV; | |
957 | goto unlock_usr_srcu; | |
958 | } | |
959 | ||
960 | qdev = usr->qddev->qdev; | |
961 | qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); | |
962 | if (qdev->in_reset) { | |
963 | ret = -ENODEV; | |
964 | goto unlock_dev_srcu; | |
965 | } | |
966 | ||
ff13be83 JH |
967 | if (args->hdr.dbc_id >= qdev->num_dbc) { |
968 | ret = -EINVAL; | |
969 | goto unlock_dev_srcu; | |
970 | } | |
971 | ||
ff13be83 JH |
972 | user_data = u64_to_user_ptr(args->data); |
973 | ||
974 | slice_ent = kzalloc(arg_size, GFP_KERNEL); | |
975 | if (!slice_ent) { | |
976 | ret = -EINVAL; | |
977 | goto unlock_dev_srcu; | |
978 | } | |
979 | ||
980 | ret = copy_from_user(slice_ent, user_data, arg_size); | |
981 | if (ret) { | |
982 | ret = -EFAULT; | |
983 | goto free_slice_ent; | |
984 | } | |
985 | ||
986 | ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, args->hdr.size); | |
987 | if (ret) | |
988 | goto free_slice_ent; | |
989 | ||
990 | obj = drm_gem_object_lookup(file_priv, args->hdr.handle); | |
991 | if (!obj) { | |
992 | ret = -ENOENT; | |
993 | goto free_slice_ent; | |
994 | } | |
995 | ||
996 | bo = to_qaic_bo(obj); | |
997 | ||
2e0904ef PRAK |
998 | if (bo->sliced) { |
999 | ret = -EINVAL; | |
1000 | goto put_bo; | |
1001 | } | |
1002 | ||
75af0a58 PRAK |
1003 | dbc = &qdev->dbc[args->hdr.dbc_id]; |
1004 | rcu_id = srcu_read_lock(&dbc->ch_lock); | |
1005 | if (dbc->usr != usr) { | |
1006 | ret = -EINVAL; | |
1007 | goto unlock_ch_srcu; | |
1008 | } | |
1009 | ||
ff13be83 JH |
1010 | ret = qaic_prepare_bo(qdev, bo, &args->hdr); |
1011 | if (ret) | |
75af0a58 | 1012 | goto unlock_ch_srcu; |
ff13be83 JH |
1013 | |
1014 | ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent); | |
1015 | if (ret) | |
1016 | goto unprepare_bo; | |
1017 | ||
1018 | if (args->hdr.dir == DMA_TO_DEVICE) | |
1019 | dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir); | |
1020 | ||
1021 | bo->dbc = dbc; | |
75af0a58 | 1022 | srcu_read_unlock(&dbc->ch_lock, rcu_id); |
ff13be83 | 1023 | drm_gem_object_put(obj); |
2d956177 | 1024 | kfree(slice_ent); |
ff13be83 JH |
1025 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); |
1026 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
1027 | ||
1028 | return 0; | |
1029 | ||
1030 | unprepare_bo: | |
1031 | qaic_unprepare_bo(qdev, bo); | |
75af0a58 PRAK |
1032 | unlock_ch_srcu: |
1033 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
ff13be83 JH |
1034 | put_bo: |
1035 | drm_gem_object_put(obj); | |
1036 | free_slice_ent: | |
1037 | kfree(slice_ent); | |
1038 | unlock_dev_srcu: | |
1039 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); | |
1040 | unlock_usr_srcu: | |
1041 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
1042 | return ret; | |
1043 | } | |
1044 | ||
1045 | static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id, | |
1046 | u32 head, u32 *ptail) | |
1047 | { | |
1048 | struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; | |
1049 | struct dbc_req *reqs = slice->reqs; | |
1050 | u32 tail = *ptail; | |
1051 | u32 avail; | |
1052 | ||
1053 | avail = head - tail; | |
1054 | if (head <= tail) | |
1055 | avail += dbc->nelem; | |
1056 | ||
1057 | --avail; | |
1058 | ||
1059 | if (avail < slice->nents) | |
1060 | return -EAGAIN; | |
1061 | ||
1062 | if (tail + slice->nents > dbc->nelem) { | |
1063 | avail = dbc->nelem - tail; | |
1064 | avail = min_t(u32, avail, slice->nents); | |
1065 | memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, | |
1066 | sizeof(*reqs) * avail); | |
1067 | reqs += avail; | |
1068 | avail = slice->nents - avail; | |
1069 | if (avail) | |
1070 | memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail); | |
1071 | } else { | |
1072 | memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, | |
1073 | sizeof(*reqs) * slice->nents); | |
1074 | } | |
1075 | ||
1076 | *ptail = (tail + slice->nents) % dbc->nelem; | |
1077 | ||
1078 | return 0; | |
1079 | } | |
1080 | ||
1081 | /* | |
1082 | * Based on the value of resize we may only need to transmit first_n | |
1083 | * entries and the last entry, with last_bytes to send from the last entry. | |
1084 | * Note that first_n could be 0. | |
1085 | */ | |
1086 | static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, | |
1087 | u64 resize, u32 dbc_id, u32 head, u32 *ptail) | |
1088 | { | |
1089 | struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; | |
1090 | struct dbc_req *reqs = slice->reqs; | |
1091 | struct dbc_req *last_req; | |
1092 | u32 tail = *ptail; | |
1093 | u64 total_bytes; | |
1094 | u64 last_bytes; | |
1095 | u32 first_n; | |
1096 | u32 avail; | |
1097 | int ret; | |
1098 | int i; | |
1099 | ||
1100 | avail = head - tail; | |
1101 | if (head <= tail) | |
1102 | avail += dbc->nelem; | |
1103 | ||
1104 | --avail; | |
1105 | ||
1106 | total_bytes = 0; | |
1107 | for (i = 0; i < slice->nents; i++) { | |
1108 | total_bytes += le32_to_cpu(reqs[i].len); | |
1109 | if (total_bytes >= resize) | |
1110 | break; | |
1111 | } | |
1112 | ||
1113 | if (total_bytes < resize) { | |
1114 | /* User space should have used the full buffer path. */ | |
1115 | ret = -EINVAL; | |
1116 | return ret; | |
1117 | } | |
1118 | ||
1119 | first_n = i; | |
1120 | last_bytes = i ? resize + le32_to_cpu(reqs[i].len) - total_bytes : resize; | |
1121 | ||
1122 | if (avail < (first_n + 1)) | |
1123 | return -EAGAIN; | |
1124 | ||
1125 | if (first_n) { | |
1126 | if (tail + first_n > dbc->nelem) { | |
1127 | avail = dbc->nelem - tail; | |
1128 | avail = min_t(u32, avail, first_n); | |
1129 | memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, | |
1130 | sizeof(*reqs) * avail); | |
1131 | last_req = reqs + avail; | |
1132 | avail = first_n - avail; | |
1133 | if (avail) | |
1134 | memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail); | |
1135 | } else { | |
1136 | memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, | |
1137 | sizeof(*reqs) * first_n); | |
1138 | } | |
1139 | } | |
1140 | ||
1141 | /* Copy over the last entry. Here we need to adjust len to the left over | |
1142 | * size, and set src and dst to the entry it is copied to. | |
1143 | */ | |
1144 | last_req = dbc->req_q_base + (tail + first_n) % dbc->nelem * get_dbc_req_elem_size(); | |
1145 | memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs)); | |
1146 | ||
1147 | /* | |
1148 | * last_bytes holds size of a DMA segment, maximum DMA segment size is | |
1149 | * set to UINT_MAX by qaic and hence last_bytes can never exceed u32 | |
1150 | * range. So, by down sizing we are not corrupting the value. | |
1151 | */ | |
1152 | last_req->len = cpu_to_le32((u32)last_bytes); | |
1153 | last_req->src_addr = reqs[first_n].src_addr; | |
1154 | last_req->dest_addr = reqs[first_n].dest_addr; | |
1155 | ||
1156 | *ptail = (tail + first_n + 1) % dbc->nelem; | |
1157 | ||
1158 | return 0; | |
1159 | } | |
1160 | ||
1161 | static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *file_priv, | |
1162 | struct qaic_execute_entry *exec, unsigned int count, | |
1163 | bool is_partial, struct dma_bridge_chan *dbc, u32 head, | |
1164 | u32 *tail) | |
1165 | { | |
1166 | struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec; | |
1167 | struct drm_gem_object *obj; | |
1168 | struct bo_slice *slice; | |
1169 | unsigned long flags; | |
1170 | struct qaic_bo *bo; | |
1171 | bool queued; | |
1172 | int i, j; | |
1173 | int ret; | |
1174 | ||
1175 | for (i = 0; i < count; i++) { | |
1176 | /* | |
1177 | * ref count will be decremented when the transfer of this | |
1178 | * buffer is complete. It is inside dbc_irq_threaded_fn(). | |
1179 | */ | |
1180 | obj = drm_gem_object_lookup(file_priv, | |
1181 | is_partial ? pexec[i].handle : exec[i].handle); | |
1182 | if (!obj) { | |
1183 | ret = -ENOENT; | |
1184 | goto failed_to_send_bo; | |
1185 | } | |
1186 | ||
1187 | bo = to_qaic_bo(obj); | |
1188 | ||
1189 | if (!bo->sliced) { | |
1190 | ret = -EINVAL; | |
1191 | goto failed_to_send_bo; | |
1192 | } | |
1193 | ||
1194 | if (is_partial && pexec[i].resize > bo->size) { | |
1195 | ret = -EINVAL; | |
1196 | goto failed_to_send_bo; | |
1197 | } | |
1198 | ||
1199 | spin_lock_irqsave(&dbc->xfer_lock, flags); | |
1200 | queued = bo->queued; | |
1201 | bo->queued = true; | |
1202 | if (queued) { | |
1203 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1204 | ret = -EINVAL; | |
1205 | goto failed_to_send_bo; | |
1206 | } | |
1207 | ||
1208 | bo->req_id = dbc->next_req_id++; | |
1209 | ||
1210 | list_for_each_entry(slice, &bo->slices, slice) { | |
1211 | /* | |
1212 | * If this slice does not fall under the given | |
1213 | * resize then skip this slice and continue the loop | |
1214 | */ | |
1215 | if (is_partial && pexec[i].resize && pexec[i].resize <= slice->offset) | |
1216 | continue; | |
1217 | ||
1218 | for (j = 0; j < slice->nents; j++) | |
1219 | slice->reqs[j].req_id = cpu_to_le16(bo->req_id); | |
1220 | ||
1221 | /* | |
1222 | * If it is a partial execute ioctl call then check if | |
1223 | * resize has cut this slice short then do a partial copy | |
1224 | * else do complete copy | |
1225 | */ | |
1226 | if (is_partial && pexec[i].resize && | |
1227 | pexec[i].resize < slice->offset + slice->size) | |
1228 | ret = copy_partial_exec_reqs(qdev, slice, | |
1229 | pexec[i].resize - slice->offset, | |
1230 | dbc->id, head, tail); | |
1231 | else | |
1232 | ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail); | |
1233 | if (ret) { | |
1234 | bo->queued = false; | |
1235 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1236 | goto failed_to_send_bo; | |
1237 | } | |
1238 | } | |
1239 | reinit_completion(&bo->xfer_done); | |
1240 | list_add_tail(&bo->xfer_list, &dbc->xfer_list); | |
1241 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1242 | dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir); | |
1243 | } | |
1244 | ||
1245 | return 0; | |
1246 | ||
1247 | failed_to_send_bo: | |
1248 | if (likely(obj)) | |
1249 | drm_gem_object_put(obj); | |
1250 | for (j = 0; j < i; j++) { | |
1251 | spin_lock_irqsave(&dbc->xfer_lock, flags); | |
1252 | bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list); | |
1253 | obj = &bo->base; | |
1254 | bo->queued = false; | |
1255 | list_del(&bo->xfer_list); | |
1256 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1257 | dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); | |
1258 | drm_gem_object_put(obj); | |
1259 | } | |
1260 | return ret; | |
1261 | } | |
1262 | ||
1263 | static void update_profiling_data(struct drm_file *file_priv, | |
1264 | struct qaic_execute_entry *exec, unsigned int count, | |
1265 | bool is_partial, u64 received_ts, u64 submit_ts, u32 queue_level) | |
1266 | { | |
1267 | struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec; | |
1268 | struct drm_gem_object *obj; | |
1269 | struct qaic_bo *bo; | |
1270 | int i; | |
1271 | ||
1272 | for (i = 0; i < count; i++) { | |
1273 | /* | |
1274 | * Since we already committed the BO to hardware, the only way | |
1275 | * this should fail is a pending signal. We can't cancel the | |
1276 | * submit to hardware, so we have to just skip the profiling | |
1277 | * data. In case the signal is not fatal to the process, we | |
1278 | * return success so that the user doesn't try to resubmit. | |
1279 | */ | |
1280 | obj = drm_gem_object_lookup(file_priv, | |
1281 | is_partial ? pexec[i].handle : exec[i].handle); | |
1282 | if (!obj) | |
1283 | break; | |
1284 | bo = to_qaic_bo(obj); | |
1285 | bo->perf_stats.req_received_ts = received_ts; | |
1286 | bo->perf_stats.req_submit_ts = submit_ts; | |
1287 | bo->perf_stats.queue_level_before = queue_level; | |
1288 | queue_level += bo->total_slice_nents; | |
1289 | drm_gem_object_put(obj); | |
1290 | } | |
1291 | } | |
1292 | ||
1293 | static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv, | |
1294 | bool is_partial) | |
1295 | { | |
1296 | struct qaic_partial_execute_entry *pexec; | |
1297 | struct qaic_execute *args = data; | |
1298 | struct qaic_execute_entry *exec; | |
1299 | struct dma_bridge_chan *dbc; | |
1300 | int usr_rcu_id, qdev_rcu_id; | |
1301 | struct qaic_device *qdev; | |
1302 | struct qaic_user *usr; | |
1303 | u8 __user *user_data; | |
1304 | unsigned long n; | |
1305 | u64 received_ts; | |
1306 | u32 queue_level; | |
1307 | u64 submit_ts; | |
1308 | int rcu_id; | |
1309 | u32 head; | |
1310 | u32 tail; | |
1311 | u64 size; | |
1312 | int ret; | |
1313 | ||
1314 | received_ts = ktime_get_ns(); | |
1315 | ||
1316 | size = is_partial ? sizeof(*pexec) : sizeof(*exec); | |
ff13be83 JH |
1317 | n = (unsigned long)size * args->hdr.count; |
1318 | if (args->hdr.count == 0 || n / args->hdr.count != size) | |
1319 | return -EINVAL; | |
1320 | ||
1321 | user_data = u64_to_user_ptr(args->data); | |
1322 | ||
1323 | exec = kcalloc(args->hdr.count, size, GFP_KERNEL); | |
1324 | pexec = (struct qaic_partial_execute_entry *)exec; | |
1325 | if (!exec) | |
1326 | return -ENOMEM; | |
1327 | ||
1328 | if (copy_from_user(exec, user_data, n)) { | |
1329 | ret = -EFAULT; | |
1330 | goto free_exec; | |
1331 | } | |
1332 | ||
1333 | usr = file_priv->driver_priv; | |
1334 | usr_rcu_id = srcu_read_lock(&usr->qddev_lock); | |
1335 | if (!usr->qddev) { | |
1336 | ret = -ENODEV; | |
1337 | goto unlock_usr_srcu; | |
1338 | } | |
1339 | ||
1340 | qdev = usr->qddev->qdev; | |
1341 | qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); | |
1342 | if (qdev->in_reset) { | |
1343 | ret = -ENODEV; | |
1344 | goto unlock_dev_srcu; | |
1345 | } | |
1346 | ||
1347 | if (args->hdr.dbc_id >= qdev->num_dbc) { | |
1348 | ret = -EINVAL; | |
1349 | goto unlock_dev_srcu; | |
1350 | } | |
1351 | ||
1352 | dbc = &qdev->dbc[args->hdr.dbc_id]; | |
1353 | ||
1354 | rcu_id = srcu_read_lock(&dbc->ch_lock); | |
1355 | if (!dbc->usr || dbc->usr->handle != usr->handle) { | |
1356 | ret = -EPERM; | |
1357 | goto release_ch_rcu; | |
1358 | } | |
1359 | ||
1360 | head = readl(dbc->dbc_base + REQHP_OFF); | |
1361 | tail = readl(dbc->dbc_base + REQTP_OFF); | |
1362 | ||
1363 | if (head == U32_MAX || tail == U32_MAX) { | |
1364 | /* PCI link error */ | |
1365 | ret = -ENODEV; | |
1366 | goto release_ch_rcu; | |
1367 | } | |
1368 | ||
1369 | queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail); | |
1370 | ||
1371 | ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc, | |
1372 | head, &tail); | |
1373 | if (ret) | |
1374 | goto release_ch_rcu; | |
1375 | ||
1376 | /* Finalize commit to hardware */ | |
1377 | submit_ts = ktime_get_ns(); | |
1378 | writel(tail, dbc->dbc_base + REQTP_OFF); | |
1379 | ||
1380 | update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts, | |
1381 | submit_ts, queue_level); | |
1382 | ||
1383 | if (datapath_polling) | |
1384 | schedule_work(&dbc->poll_work); | |
1385 | ||
1386 | release_ch_rcu: | |
1387 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1388 | unlock_dev_srcu: | |
1389 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); | |
1390 | unlock_usr_srcu: | |
1391 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
1392 | free_exec: | |
1393 | kfree(exec); | |
1394 | return ret; | |
1395 | } | |
1396 | ||
1397 | int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) | |
1398 | { | |
1399 | return __qaic_execute_bo_ioctl(dev, data, file_priv, false); | |
1400 | } | |
1401 | ||
1402 | int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) | |
1403 | { | |
1404 | return __qaic_execute_bo_ioctl(dev, data, file_priv, true); | |
1405 | } | |
1406 | ||
1407 | /* | |
1408 | * Our interrupt handling is a bit more complicated than a simple ideal, but | |
1409 | * sadly necessary. | |
1410 | * | |
1411 | * Each dbc has a completion queue. Entries in the queue correspond to DMA | |
1412 | * requests which the device has processed. The hardware already has a built | |
1413 | * in irq mitigation. When the device puts an entry into the queue, it will | |
1414 | * only trigger an interrupt if the queue was empty. Therefore, when adding | |
1415 | * the Nth event to a non-empty queue, the hardware doesn't trigger an | |
1416 | * interrupt. This means the host doesn't get additional interrupts signaling | |
1417 | * the same thing - the queue has something to process. | |
1418 | * This behavior can be overridden in the DMA request. | |
1419 | * This means that when the host receives an interrupt, it is required to | |
1420 | * drain the queue. | |
1421 | * | |
1422 | * This behavior is what NAPI attempts to accomplish, although we can't use | |
1423 | * NAPI as we don't have a netdev. We use threaded irqs instead. | |
1424 | * | |
1425 | * However, there is a situation where the host drains the queue fast enough | |
1426 | * that every event causes an interrupt. Typically this is not a problem as | |
1427 | * the rate of events would be low. However, that is not the case with | |
1428 | * lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of | |
1429 | * lprnet, the host receives roughly 80k interrupts per second from the device | |
1430 | * (per /proc/interrupts). While NAPI documentation indicates the host should | |
1431 | * just chug along, sadly that behavior causes instability in some hosts. | |
1432 | * | |
1433 | * Therefore, we implement an interrupt disable scheme similar to NAPI. The | |
1434 | * key difference is that we will delay after draining the queue for a small | |
1435 | * time to allow additional events to come in via polling. Using the above | |
1436 | * lprnet workload, this reduces the number of interrupts processed from | |
1437 | * ~80k/sec to about 64 in 5 minutes and appears to solve the system | |
1438 | * instability. | |
1439 | */ | |
1440 | irqreturn_t dbc_irq_handler(int irq, void *data) | |
1441 | { | |
1442 | struct dma_bridge_chan *dbc = data; | |
1443 | int rcu_id; | |
1444 | u32 head; | |
1445 | u32 tail; | |
1446 | ||
1447 | rcu_id = srcu_read_lock(&dbc->ch_lock); | |
1448 | ||
1449 | if (!dbc->usr) { | |
1450 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1451 | return IRQ_HANDLED; | |
1452 | } | |
1453 | ||
1454 | head = readl(dbc->dbc_base + RSPHP_OFF); | |
1455 | if (head == U32_MAX) { /* PCI link error */ | |
1456 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1457 | return IRQ_NONE; | |
1458 | } | |
1459 | ||
1460 | tail = readl(dbc->dbc_base + RSPTP_OFF); | |
1461 | if (tail == U32_MAX) { /* PCI link error */ | |
1462 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1463 | return IRQ_NONE; | |
1464 | } | |
1465 | ||
1466 | if (head == tail) { /* queue empty */ | |
1467 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1468 | return IRQ_NONE; | |
1469 | } | |
1470 | ||
1471 | disable_irq_nosync(irq); | |
1472 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1473 | return IRQ_WAKE_THREAD; | |
1474 | } | |
1475 | ||
1476 | void irq_polling_work(struct work_struct *work) | |
1477 | { | |
1478 | struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work); | |
1479 | unsigned long flags; | |
1480 | int rcu_id; | |
1481 | u32 head; | |
1482 | u32 tail; | |
1483 | ||
1484 | rcu_id = srcu_read_lock(&dbc->ch_lock); | |
1485 | ||
1486 | while (1) { | |
1487 | if (dbc->qdev->in_reset) { | |
1488 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1489 | return; | |
1490 | } | |
1491 | if (!dbc->usr) { | |
1492 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1493 | return; | |
1494 | } | |
1495 | spin_lock_irqsave(&dbc->xfer_lock, flags); | |
1496 | if (list_empty(&dbc->xfer_list)) { | |
1497 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1498 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1499 | return; | |
1500 | } | |
1501 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1502 | ||
1503 | head = readl(dbc->dbc_base + RSPHP_OFF); | |
1504 | if (head == U32_MAX) { /* PCI link error */ | |
1505 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1506 | return; | |
1507 | } | |
1508 | ||
1509 | tail = readl(dbc->dbc_base + RSPTP_OFF); | |
1510 | if (tail == U32_MAX) { /* PCI link error */ | |
1511 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1512 | return; | |
1513 | } | |
1514 | ||
1515 | if (head != tail) { | |
1516 | irq_wake_thread(dbc->irq, dbc); | |
1517 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1518 | return; | |
1519 | } | |
1520 | ||
1521 | cond_resched(); | |
1522 | usleep_range(datapath_poll_interval_us, 2 * datapath_poll_interval_us); | |
1523 | } | |
1524 | } | |
1525 | ||
1526 | irqreturn_t dbc_irq_threaded_fn(int irq, void *data) | |
1527 | { | |
1528 | struct dma_bridge_chan *dbc = data; | |
1529 | int event_count = NUM_EVENTS; | |
1530 | int delay_count = NUM_DELAYS; | |
1531 | struct qaic_device *qdev; | |
1532 | struct qaic_bo *bo, *i; | |
1533 | struct dbc_rsp *rsp; | |
1534 | unsigned long flags; | |
1535 | int rcu_id; | |
1536 | u16 status; | |
1537 | u16 req_id; | |
1538 | u32 head; | |
1539 | u32 tail; | |
1540 | ||
1541 | rcu_id = srcu_read_lock(&dbc->ch_lock); | |
1542 | ||
1543 | head = readl(dbc->dbc_base + RSPHP_OFF); | |
1544 | if (head == U32_MAX) /* PCI link error */ | |
1545 | goto error_out; | |
1546 | ||
1547 | qdev = dbc->qdev; | |
1548 | read_fifo: | |
1549 | ||
1550 | if (!event_count) { | |
1551 | event_count = NUM_EVENTS; | |
1552 | cond_resched(); | |
1553 | } | |
1554 | ||
1555 | /* | |
1556 | * if this channel isn't assigned or gets unassigned during processing | |
1557 | * we have nothing further to do | |
1558 | */ | |
1559 | if (!dbc->usr) | |
1560 | goto error_out; | |
1561 | ||
1562 | tail = readl(dbc->dbc_base + RSPTP_OFF); | |
1563 | if (tail == U32_MAX) /* PCI link error */ | |
1564 | goto error_out; | |
1565 | ||
1566 | if (head == tail) { /* queue empty */ | |
1567 | if (delay_count) { | |
1568 | --delay_count; | |
1569 | usleep_range(100, 200); | |
1570 | goto read_fifo; /* check for a new event */ | |
1571 | } | |
1572 | goto normal_out; | |
1573 | } | |
1574 | ||
1575 | delay_count = NUM_DELAYS; | |
1576 | while (head != tail) { | |
1577 | if (!event_count) | |
1578 | break; | |
1579 | --event_count; | |
1580 | rsp = dbc->rsp_q_base + head * sizeof(*rsp); | |
1581 | req_id = le16_to_cpu(rsp->req_id); | |
1582 | status = le16_to_cpu(rsp->status); | |
1583 | if (status) | |
1584 | pci_dbg(qdev->pdev, "req_id %d failed with status %d\n", req_id, status); | |
1585 | spin_lock_irqsave(&dbc->xfer_lock, flags); | |
1586 | /* | |
1587 | * A BO can receive multiple interrupts, since a BO can be | |
1588 | * divided into multiple slices and a buffer receives as many | |
1589 | * interrupts as slices. So until it receives interrupts for | |
1590 | * all the slices we cannot mark that buffer complete. | |
1591 | */ | |
1592 | list_for_each_entry_safe(bo, i, &dbc->xfer_list, xfer_list) { | |
1593 | if (bo->req_id == req_id) | |
1594 | bo->nr_slice_xfer_done++; | |
1595 | else | |
1596 | continue; | |
1597 | ||
1598 | if (bo->nr_slice_xfer_done < bo->nr_slice) | |
1599 | break; | |
1600 | ||
1601 | /* | |
1602 | * At this point we have received all the interrupts for | |
1603 | * BO, which means BO execution is complete. | |
1604 | */ | |
1605 | dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); | |
1606 | bo->nr_slice_xfer_done = 0; | |
1607 | bo->queued = false; | |
1608 | list_del(&bo->xfer_list); | |
1609 | bo->perf_stats.req_processed_ts = ktime_get_ns(); | |
1610 | complete_all(&bo->xfer_done); | |
1611 | drm_gem_object_put(&bo->base); | |
1612 | break; | |
1613 | } | |
1614 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1615 | head = (head + 1) % dbc->nelem; | |
1616 | } | |
1617 | ||
1618 | /* | |
1619 | * Update the head pointer of response queue and let the device know | |
1620 | * that we have consumed elements from the queue. | |
1621 | */ | |
1622 | writel(head, dbc->dbc_base + RSPHP_OFF); | |
1623 | ||
1624 | /* elements might have been put in the queue while we were processing */ | |
1625 | goto read_fifo; | |
1626 | ||
1627 | normal_out: | |
1628 | if (likely(!datapath_polling)) | |
1629 | enable_irq(irq); | |
1630 | else | |
1631 | schedule_work(&dbc->poll_work); | |
1632 | /* checking the fifo and enabling irqs is a race, missed event check */ | |
1633 | tail = readl(dbc->dbc_base + RSPTP_OFF); | |
1634 | if (tail != U32_MAX && head != tail) { | |
1635 | if (likely(!datapath_polling)) | |
1636 | disable_irq_nosync(irq); | |
1637 | goto read_fifo; | |
1638 | } | |
1639 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1640 | return IRQ_HANDLED; | |
1641 | ||
1642 | error_out: | |
1643 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1644 | if (likely(!datapath_polling)) | |
1645 | enable_irq(irq); | |
1646 | else | |
1647 | schedule_work(&dbc->poll_work); | |
1648 | ||
1649 | return IRQ_HANDLED; | |
1650 | } | |
1651 | ||
1652 | int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) | |
1653 | { | |
1654 | struct qaic_wait *args = data; | |
1655 | int usr_rcu_id, qdev_rcu_id; | |
1656 | struct dma_bridge_chan *dbc; | |
1657 | struct drm_gem_object *obj; | |
1658 | struct qaic_device *qdev; | |
1659 | unsigned long timeout; | |
1660 | struct qaic_user *usr; | |
1661 | struct qaic_bo *bo; | |
1662 | int rcu_id; | |
1663 | int ret; | |
1664 | ||
d3b277b7 PRAK |
1665 | if (args->pad != 0) |
1666 | return -EINVAL; | |
1667 | ||
ff13be83 JH |
1668 | usr = file_priv->driver_priv; |
1669 | usr_rcu_id = srcu_read_lock(&usr->qddev_lock); | |
1670 | if (!usr->qddev) { | |
1671 | ret = -ENODEV; | |
1672 | goto unlock_usr_srcu; | |
1673 | } | |
1674 | ||
1675 | qdev = usr->qddev->qdev; | |
1676 | qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); | |
1677 | if (qdev->in_reset) { | |
1678 | ret = -ENODEV; | |
1679 | goto unlock_dev_srcu; | |
1680 | } | |
1681 | ||
ff13be83 JH |
1682 | if (args->dbc_id >= qdev->num_dbc) { |
1683 | ret = -EINVAL; | |
1684 | goto unlock_dev_srcu; | |
1685 | } | |
1686 | ||
1687 | dbc = &qdev->dbc[args->dbc_id]; | |
1688 | ||
1689 | rcu_id = srcu_read_lock(&dbc->ch_lock); | |
1690 | if (dbc->usr != usr) { | |
1691 | ret = -EPERM; | |
1692 | goto unlock_ch_srcu; | |
1693 | } | |
1694 | ||
1695 | obj = drm_gem_object_lookup(file_priv, args->handle); | |
1696 | if (!obj) { | |
1697 | ret = -ENOENT; | |
1698 | goto unlock_ch_srcu; | |
1699 | } | |
1700 | ||
1701 | bo = to_qaic_bo(obj); | |
1702 | timeout = args->timeout ? args->timeout : wait_exec_default_timeout_ms; | |
1703 | timeout = msecs_to_jiffies(timeout); | |
1704 | ret = wait_for_completion_interruptible_timeout(&bo->xfer_done, timeout); | |
1705 | if (!ret) { | |
1706 | ret = -ETIMEDOUT; | |
1707 | goto put_obj; | |
1708 | } | |
1709 | if (ret > 0) | |
1710 | ret = 0; | |
1711 | ||
1712 | if (!dbc->usr) | |
1713 | ret = -EPERM; | |
1714 | ||
1715 | put_obj: | |
1716 | drm_gem_object_put(obj); | |
1717 | unlock_ch_srcu: | |
1718 | srcu_read_unlock(&dbc->ch_lock, rcu_id); | |
1719 | unlock_dev_srcu: | |
1720 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); | |
1721 | unlock_usr_srcu: | |
1722 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
1723 | return ret; | |
1724 | } | |
1725 | ||
1726 | int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) | |
1727 | { | |
1728 | struct qaic_perf_stats_entry *ent = NULL; | |
1729 | struct qaic_perf_stats *args = data; | |
1730 | int usr_rcu_id, qdev_rcu_id; | |
1731 | struct drm_gem_object *obj; | |
1732 | struct qaic_device *qdev; | |
1733 | struct qaic_user *usr; | |
1734 | struct qaic_bo *bo; | |
1735 | int ret, i; | |
1736 | ||
1737 | usr = file_priv->driver_priv; | |
1738 | usr_rcu_id = srcu_read_lock(&usr->qddev_lock); | |
1739 | if (!usr->qddev) { | |
1740 | ret = -ENODEV; | |
1741 | goto unlock_usr_srcu; | |
1742 | } | |
1743 | ||
1744 | qdev = usr->qddev->qdev; | |
1745 | qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); | |
1746 | if (qdev->in_reset) { | |
1747 | ret = -ENODEV; | |
1748 | goto unlock_dev_srcu; | |
1749 | } | |
1750 | ||
1751 | if (args->hdr.dbc_id >= qdev->num_dbc) { | |
1752 | ret = -EINVAL; | |
1753 | goto unlock_dev_srcu; | |
1754 | } | |
1755 | ||
1756 | ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL); | |
1757 | if (!ent) { | |
1758 | ret = -EINVAL; | |
1759 | goto unlock_dev_srcu; | |
1760 | } | |
1761 | ||
1762 | ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent)); | |
1763 | if (ret) { | |
1764 | ret = -EFAULT; | |
1765 | goto free_ent; | |
1766 | } | |
1767 | ||
1768 | for (i = 0; i < args->hdr.count; i++) { | |
1769 | obj = drm_gem_object_lookup(file_priv, ent[i].handle); | |
1770 | if (!obj) { | |
1771 | ret = -ENOENT; | |
1772 | goto free_ent; | |
1773 | } | |
1774 | bo = to_qaic_bo(obj); | |
1775 | /* | |
1776 | * perf stats ioctl is called before wait ioctl is complete then | |
1777 | * the latency information is invalid. | |
1778 | */ | |
1779 | if (bo->perf_stats.req_processed_ts < bo->perf_stats.req_submit_ts) { | |
1780 | ent[i].device_latency_us = 0; | |
1781 | } else { | |
1782 | ent[i].device_latency_us = div_u64((bo->perf_stats.req_processed_ts - | |
1783 | bo->perf_stats.req_submit_ts), 1000); | |
1784 | } | |
1785 | ent[i].submit_latency_us = div_u64((bo->perf_stats.req_submit_ts - | |
1786 | bo->perf_stats.req_received_ts), 1000); | |
1787 | ent[i].queue_level_before = bo->perf_stats.queue_level_before; | |
1788 | ent[i].num_queue_element = bo->total_slice_nents; | |
1789 | drm_gem_object_put(obj); | |
1790 | } | |
1791 | ||
1792 | if (copy_to_user(u64_to_user_ptr(args->data), ent, args->hdr.count * sizeof(*ent))) | |
1793 | ret = -EFAULT; | |
1794 | ||
1795 | free_ent: | |
1796 | kfree(ent); | |
1797 | unlock_dev_srcu: | |
1798 | srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); | |
1799 | unlock_usr_srcu: | |
1800 | srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); | |
1801 | return ret; | |
1802 | } | |
1803 | ||
1804 | static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc) | |
1805 | { | |
1806 | unsigned long flags; | |
1807 | struct qaic_bo *bo; | |
1808 | ||
1809 | spin_lock_irqsave(&dbc->xfer_lock, flags); | |
1810 | while (!list_empty(&dbc->xfer_list)) { | |
1811 | bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list); | |
1812 | bo->queued = false; | |
1813 | list_del(&bo->xfer_list); | |
1814 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1815 | dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); | |
1816 | complete_all(&bo->xfer_done); | |
1817 | drm_gem_object_put(&bo->base); | |
1818 | spin_lock_irqsave(&dbc->xfer_lock, flags); | |
1819 | } | |
1820 | spin_unlock_irqrestore(&dbc->xfer_lock, flags); | |
1821 | } | |
1822 | ||
1823 | int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr) | |
1824 | { | |
1825 | if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle) | |
1826 | return -EPERM; | |
1827 | ||
1828 | qdev->dbc[dbc_id].usr = NULL; | |
1829 | synchronize_srcu(&qdev->dbc[dbc_id].ch_lock); | |
1830 | return 0; | |
1831 | } | |
1832 | ||
1833 | /** | |
1834 | * enable_dbc - Enable the DBC. DBCs are disabled by removing the context of | |
1835 | * user. Add user context back to DBC to enable it. This function trusts the | |
1836 | * DBC ID passed and expects the DBC to be disabled. | |
1837 | * @qdev: Qranium device handle | |
1838 | * @dbc_id: ID of the DBC | |
1839 | * @usr: User context | |
1840 | */ | |
1841 | void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr) | |
1842 | { | |
1843 | qdev->dbc[dbc_id].usr = usr; | |
1844 | } | |
1845 | ||
1846 | void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id) | |
1847 | { | |
1848 | struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; | |
1849 | ||
1850 | dbc->usr = NULL; | |
1851 | empty_xfer_list(qdev, dbc); | |
1852 | synchronize_srcu(&dbc->ch_lock); | |
faa7c4ee PRAK |
1853 | /* |
1854 | * Threads holding channel lock, may add more elements in the xfer_list. | |
1855 | * Flush out these elements from xfer_list. | |
1856 | */ | |
1857 | empty_xfer_list(qdev, dbc); | |
ff13be83 JH |
1858 | } |
1859 | ||
1860 | void release_dbc(struct qaic_device *qdev, u32 dbc_id) | |
1861 | { | |
1862 | struct bo_slice *slice, *slice_temp; | |
1863 | struct qaic_bo *bo, *bo_temp; | |
1864 | struct dma_bridge_chan *dbc; | |
1865 | ||
1866 | dbc = &qdev->dbc[dbc_id]; | |
1867 | if (!dbc->in_use) | |
1868 | return; | |
1869 | ||
1870 | wakeup_dbc(qdev, dbc_id); | |
1871 | ||
1872 | dma_free_coherent(&qdev->pdev->dev, dbc->total_size, dbc->req_q_base, dbc->dma_addr); | |
1873 | dbc->total_size = 0; | |
1874 | dbc->req_q_base = NULL; | |
1875 | dbc->dma_addr = 0; | |
1876 | dbc->nelem = 0; | |
1877 | dbc->usr = NULL; | |
1878 | ||
1879 | list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) { | |
1880 | list_for_each_entry_safe(slice, slice_temp, &bo->slices, slice) | |
1881 | kref_put(&slice->ref_count, free_slice); | |
1882 | bo->sliced = false; | |
1883 | INIT_LIST_HEAD(&bo->slices); | |
1884 | bo->total_slice_nents = 0; | |
1885 | bo->dir = 0; | |
1886 | bo->dbc = NULL; | |
1887 | bo->nr_slice = 0; | |
1888 | bo->nr_slice_xfer_done = 0; | |
1889 | bo->queued = false; | |
1890 | bo->req_id = 0; | |
1891 | init_completion(&bo->xfer_done); | |
1892 | complete_all(&bo->xfer_done); | |
1893 | list_del(&bo->bo_list); | |
1894 | bo->perf_stats.req_received_ts = 0; | |
1895 | bo->perf_stats.req_submit_ts = 0; | |
1896 | bo->perf_stats.req_processed_ts = 0; | |
1897 | bo->perf_stats.queue_level_before = 0; | |
1898 | } | |
1899 | ||
1900 | dbc->in_use = false; | |
1901 | wake_up(&dbc->dbc_release); | |
1902 | } |