Merge tag 'tty-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
[linux-2.6-block.git] / drivers / gpu / host1x / job.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Tegra host1x Job
4  *
5  * Copyright (c) 2010-2015, NVIDIA Corporation.
6  */
7
8 #include <linux/dma-mapping.h>
9 #include <linux/err.h>
10 #include <linux/host1x.h>
11 #include <linux/iommu.h>
12 #include <linux/kref.h>
13 #include <linux/module.h>
14 #include <linux/scatterlist.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <trace/events/host1x.h>
18
19 #include "channel.h"
20 #include "dev.h"
21 #include "job.h"
22 #include "syncpt.h"
23
24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
25
26 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
27                                     u32 num_cmdbufs, u32 num_relocs,
28                                     bool skip_firewall)
29 {
30         struct host1x_job *job = NULL;
31         unsigned int num_unpins = num_relocs;
32         bool enable_firewall;
33         u64 total;
34         void *mem;
35
36         enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
37
38         if (!enable_firewall)
39                 num_unpins += num_cmdbufs;
40
41         /* Check that we're not going to overflow */
42         total = sizeof(struct host1x_job) +
43                 (u64)num_relocs * sizeof(struct host1x_reloc) +
44                 (u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
45                 (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
46                 (u64)num_unpins * sizeof(dma_addr_t) +
47                 (u64)num_unpins * sizeof(u32 *);
48         if (total > ULONG_MAX)
49                 return NULL;
50
51         mem = job = kzalloc(total, GFP_KERNEL);
52         if (!job)
53                 return NULL;
54
55         job->enable_firewall = enable_firewall;
56
57         kref_init(&job->ref);
58         job->channel = ch;
59
60         /* Redistribute memory to the structs  */
61         mem += sizeof(struct host1x_job);
62         job->relocs = num_relocs ? mem : NULL;
63         mem += num_relocs * sizeof(struct host1x_reloc);
64         job->unpins = num_unpins ? mem : NULL;
65         mem += num_unpins * sizeof(struct host1x_job_unpin_data);
66         job->cmds = num_cmdbufs ? mem : NULL;
67         mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
68         job->addr_phys = num_unpins ? mem : NULL;
69
70         job->reloc_addr_phys = job->addr_phys;
71         job->gather_addr_phys = &job->addr_phys[num_relocs];
72
73         return job;
74 }
75 EXPORT_SYMBOL(host1x_job_alloc);
76
77 struct host1x_job *host1x_job_get(struct host1x_job *job)
78 {
79         kref_get(&job->ref);
80         return job;
81 }
82 EXPORT_SYMBOL(host1x_job_get);
83
84 static void job_free(struct kref *ref)
85 {
86         struct host1x_job *job = container_of(ref, struct host1x_job, ref);
87
88         if (job->release)
89                 job->release(job);
90
91         if (job->waiter)
92                 host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
93                                     job->waiter, false);
94
95         if (job->syncpt)
96                 host1x_syncpt_put(job->syncpt);
97
98         kfree(job);
99 }
100
101 void host1x_job_put(struct host1x_job *job)
102 {
103         kref_put(&job->ref, job_free);
104 }
105 EXPORT_SYMBOL(host1x_job_put);
106
107 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
108                            unsigned int words, unsigned int offset)
109 {
110         struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
111
112         gather->words = words;
113         gather->bo = bo;
114         gather->offset = offset;
115
116         job->num_cmds++;
117 }
118 EXPORT_SYMBOL(host1x_job_add_gather);
119
120 void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
121                          bool relative, u32 next_class)
122 {
123         struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
124
125         cmd->is_wait = true;
126         cmd->wait.id = id;
127         cmd->wait.threshold = thresh;
128         cmd->wait.next_class = next_class;
129         cmd->wait.relative = relative;
130
131         job->num_cmds++;
132 }
133 EXPORT_SYMBOL(host1x_job_add_wait);
134
135 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
136 {
137         unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
138         struct host1x_client *client = job->client;
139         struct device *dev = client->dev;
140         struct host1x_job_gather *g;
141         unsigned int i;
142         int err;
143
144         job->num_unpins = 0;
145
146         for (i = 0; i < job->num_relocs; i++) {
147                 struct host1x_reloc *reloc = &job->relocs[i];
148                 enum dma_data_direction direction;
149                 struct host1x_bo_mapping *map;
150                 struct host1x_bo *bo;
151
152                 reloc->target.bo = host1x_bo_get(reloc->target.bo);
153                 if (!reloc->target.bo) {
154                         err = -EINVAL;
155                         goto unpin;
156                 }
157
158                 bo = reloc->target.bo;
159
160                 switch (reloc->flags & mask) {
161                 case HOST1X_RELOC_READ:
162                         direction = DMA_TO_DEVICE;
163                         break;
164
165                 case HOST1X_RELOC_WRITE:
166                         direction = DMA_FROM_DEVICE;
167                         break;
168
169                 case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
170                         direction = DMA_BIDIRECTIONAL;
171                         break;
172
173                 default:
174                         err = -EINVAL;
175                         goto unpin;
176                 }
177
178                 map = host1x_bo_pin(dev, bo, direction, NULL);
179                 if (IS_ERR(map)) {
180                         err = PTR_ERR(map);
181                         goto unpin;
182                 }
183
184                 /*
185                  * host1x clients are generally not able to do scatter-gather themselves, so fail
186                  * if the buffer is discontiguous and we fail to map its SG table to a single
187                  * contiguous chunk of I/O virtual memory.
188                  */
189                 if (map->chunks > 1) {
190                         err = -EINVAL;
191                         goto unpin;
192                 }
193
194                 job->addr_phys[job->num_unpins] = map->phys;
195                 job->unpins[job->num_unpins].map = map;
196                 job->num_unpins++;
197         }
198
199         /*
200          * We will copy gathers BO content later, so there is no need to
201          * hold and pin them.
202          */
203         if (job->enable_firewall)
204                 return 0;
205
206         for (i = 0; i < job->num_cmds; i++) {
207                 struct host1x_bo_mapping *map;
208                 size_t gather_size = 0;
209                 struct scatterlist *sg;
210                 unsigned long shift;
211                 struct iova *alloc;
212                 unsigned int j;
213
214                 if (job->cmds[i].is_wait)
215                         continue;
216
217                 g = &job->cmds[i].gather;
218
219                 g->bo = host1x_bo_get(g->bo);
220                 if (!g->bo) {
221                         err = -EINVAL;
222                         goto unpin;
223                 }
224
225                 map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL);
226                 if (IS_ERR(map)) {
227                         err = PTR_ERR(map);
228                         goto unpin;
229                 }
230
231                 if (host->domain) {
232                         for_each_sgtable_sg(map->sgt, sg, j)
233                                 gather_size += sg->length;
234
235                         gather_size = iova_align(&host->iova, gather_size);
236
237                         shift = iova_shift(&host->iova);
238                         alloc = alloc_iova(&host->iova, gather_size >> shift,
239                                            host->iova_end >> shift, true);
240                         if (!alloc) {
241                                 err = -ENOMEM;
242                                 goto put;
243                         }
244
245                         err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
246                                                 map->sgt, IOMMU_READ);
247                         if (err == 0) {
248                                 __free_iova(&host->iova, alloc);
249                                 err = -EINVAL;
250                                 goto put;
251                         }
252
253                         map->phys = iova_dma_addr(&host->iova, alloc);
254                         map->size = gather_size;
255                 }
256
257                 job->addr_phys[job->num_unpins] = map->phys;
258                 job->unpins[job->num_unpins].map = map;
259                 job->num_unpins++;
260
261                 job->gather_addr_phys[i] = map->phys;
262         }
263
264         return 0;
265
266 put:
267         host1x_bo_put(g->bo);
268 unpin:
269         host1x_job_unpin(job);
270         return err;
271 }
272
273 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
274 {
275         void *cmdbuf_addr = NULL;
276         struct host1x_bo *cmdbuf = g->bo;
277         unsigned int i;
278
279         /* pin & patch the relocs for one gather */
280         for (i = 0; i < job->num_relocs; i++) {
281                 struct host1x_reloc *reloc = &job->relocs[i];
282                 u32 reloc_addr = (job->reloc_addr_phys[i] +
283                                   reloc->target.offset) >> reloc->shift;
284                 u32 *target;
285
286                 /* skip all other gathers */
287                 if (cmdbuf != reloc->cmdbuf.bo)
288                         continue;
289
290                 if (job->enable_firewall) {
291                         target = (u32 *)job->gather_copy_mapped +
292                                         reloc->cmdbuf.offset / sizeof(u32) +
293                                                 g->offset / sizeof(u32);
294                         goto patch_reloc;
295                 }
296
297                 if (!cmdbuf_addr) {
298                         cmdbuf_addr = host1x_bo_mmap(cmdbuf);
299
300                         if (unlikely(!cmdbuf_addr)) {
301                                 pr_err("Could not map cmdbuf for relocation\n");
302                                 return -ENOMEM;
303                         }
304                 }
305
306                 target = cmdbuf_addr + reloc->cmdbuf.offset;
307 patch_reloc:
308                 *target = reloc_addr;
309         }
310
311         if (cmdbuf_addr)
312                 host1x_bo_munmap(cmdbuf, cmdbuf_addr);
313
314         return 0;
315 }
316
317 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
318                         unsigned int offset)
319 {
320         offset *= sizeof(u32);
321
322         if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
323                 return false;
324
325         /* relocation shift value validation isn't implemented yet */
326         if (reloc->shift)
327                 return false;
328
329         return true;
330 }
331
332 struct host1x_firewall {
333         struct host1x_job *job;
334         struct device *dev;
335
336         unsigned int num_relocs;
337         struct host1x_reloc *reloc;
338
339         struct host1x_bo *cmdbuf;
340         unsigned int offset;
341
342         u32 words;
343         u32 class;
344         u32 reg;
345         u32 mask;
346         u32 count;
347 };
348
349 static int check_register(struct host1x_firewall *fw, unsigned long offset)
350 {
351         if (!fw->job->is_addr_reg)
352                 return 0;
353
354         if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
355                 if (!fw->num_relocs)
356                         return -EINVAL;
357
358                 if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
359                         return -EINVAL;
360
361                 fw->num_relocs--;
362                 fw->reloc++;
363         }
364
365         return 0;
366 }
367
368 static int check_class(struct host1x_firewall *fw, u32 class)
369 {
370         if (!fw->job->is_valid_class) {
371                 if (fw->class != class)
372                         return -EINVAL;
373         } else {
374                 if (!fw->job->is_valid_class(fw->class))
375                         return -EINVAL;
376         }
377
378         return 0;
379 }
380
381 static int check_mask(struct host1x_firewall *fw)
382 {
383         u32 mask = fw->mask;
384         u32 reg = fw->reg;
385         int ret;
386
387         while (mask) {
388                 if (fw->words == 0)
389                         return -EINVAL;
390
391                 if (mask & 1) {
392                         ret = check_register(fw, reg);
393                         if (ret < 0)
394                                 return ret;
395
396                         fw->words--;
397                         fw->offset++;
398                 }
399                 mask >>= 1;
400                 reg++;
401         }
402
403         return 0;
404 }
405
406 static int check_incr(struct host1x_firewall *fw)
407 {
408         u32 count = fw->count;
409         u32 reg = fw->reg;
410         int ret;
411
412         while (count) {
413                 if (fw->words == 0)
414                         return -EINVAL;
415
416                 ret = check_register(fw, reg);
417                 if (ret < 0)
418                         return ret;
419
420                 reg++;
421                 fw->words--;
422                 fw->offset++;
423                 count--;
424         }
425
426         return 0;
427 }
428
429 static int check_nonincr(struct host1x_firewall *fw)
430 {
431         u32 count = fw->count;
432         int ret;
433
434         while (count) {
435                 if (fw->words == 0)
436                         return -EINVAL;
437
438                 ret = check_register(fw, fw->reg);
439                 if (ret < 0)
440                         return ret;
441
442                 fw->words--;
443                 fw->offset++;
444                 count--;
445         }
446
447         return 0;
448 }
449
450 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
451 {
452         u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
453                 (g->offset / sizeof(u32));
454         u32 job_class = fw->class;
455         int err = 0;
456
457         fw->words = g->words;
458         fw->cmdbuf = g->bo;
459         fw->offset = 0;
460
461         while (fw->words && !err) {
462                 u32 word = cmdbuf_base[fw->offset];
463                 u32 opcode = (word & 0xf0000000) >> 28;
464
465                 fw->mask = 0;
466                 fw->reg = 0;
467                 fw->count = 0;
468                 fw->words--;
469                 fw->offset++;
470
471                 switch (opcode) {
472                 case 0:
473                         fw->class = word >> 6 & 0x3ff;
474                         fw->mask = word & 0x3f;
475                         fw->reg = word >> 16 & 0xfff;
476                         err = check_class(fw, job_class);
477                         if (!err)
478                                 err = check_mask(fw);
479                         if (err)
480                                 goto out;
481                         break;
482                 case 1:
483                         fw->reg = word >> 16 & 0xfff;
484                         fw->count = word & 0xffff;
485                         err = check_incr(fw);
486                         if (err)
487                                 goto out;
488                         break;
489
490                 case 2:
491                         fw->reg = word >> 16 & 0xfff;
492                         fw->count = word & 0xffff;
493                         err = check_nonincr(fw);
494                         if (err)
495                                 goto out;
496                         break;
497
498                 case 3:
499                         fw->mask = word & 0xffff;
500                         fw->reg = word >> 16 & 0xfff;
501                         err = check_mask(fw);
502                         if (err)
503                                 goto out;
504                         break;
505                 case 4:
506                 case 14:
507                         break;
508                 default:
509                         err = -EINVAL;
510                         break;
511                 }
512         }
513
514 out:
515         return err;
516 }
517
518 static inline int copy_gathers(struct device *host, struct host1x_job *job,
519                                struct device *dev)
520 {
521         struct host1x_firewall fw;
522         size_t size = 0;
523         size_t offset = 0;
524         unsigned int i;
525
526         fw.job = job;
527         fw.dev = dev;
528         fw.reloc = job->relocs;
529         fw.num_relocs = job->num_relocs;
530         fw.class = job->class;
531
532         for (i = 0; i < job->num_cmds; i++) {
533                 struct host1x_job_gather *g;
534
535                 if (job->cmds[i].is_wait)
536                         continue;
537
538                 g = &job->cmds[i].gather;
539
540                 size += g->words * sizeof(u32);
541         }
542
543         /*
544          * Try a non-blocking allocation from a higher priority pools first,
545          * as awaiting for the allocation here is a major performance hit.
546          */
547         job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
548                                                GFP_NOWAIT);
549
550         /* the higher priority allocation failed, try the generic-blocking */
551         if (!job->gather_copy_mapped)
552                 job->gather_copy_mapped = dma_alloc_wc(host, size,
553                                                        &job->gather_copy,
554                                                        GFP_KERNEL);
555         if (!job->gather_copy_mapped)
556                 return -ENOMEM;
557
558         job->gather_copy_size = size;
559
560         for (i = 0; i < job->num_cmds; i++) {
561                 struct host1x_job_gather *g;
562                 void *gather;
563
564                 if (job->cmds[i].is_wait)
565                         continue;
566                 g = &job->cmds[i].gather;
567
568                 /* Copy the gather */
569                 gather = host1x_bo_mmap(g->bo);
570                 memcpy(job->gather_copy_mapped + offset, gather + g->offset,
571                        g->words * sizeof(u32));
572                 host1x_bo_munmap(g->bo, gather);
573
574                 /* Store the location in the buffer */
575                 g->base = job->gather_copy;
576                 g->offset = offset;
577
578                 /* Validate the job */
579                 if (validate(&fw, g))
580                         return -EINVAL;
581
582                 offset += g->words * sizeof(u32);
583         }
584
585         /* No relocs should remain at this point */
586         if (fw.num_relocs)
587                 return -EINVAL;
588
589         return 0;
590 }
591
592 int host1x_job_pin(struct host1x_job *job, struct device *dev)
593 {
594         int err;
595         unsigned int i, j;
596         struct host1x *host = dev_get_drvdata(dev->parent);
597
598         /* pin memory */
599         err = pin_job(host, job);
600         if (err)
601                 goto out;
602
603         if (job->enable_firewall) {
604                 err = copy_gathers(host->dev, job, dev);
605                 if (err)
606                         goto out;
607         }
608
609         /* patch gathers */
610         for (i = 0; i < job->num_cmds; i++) {
611                 struct host1x_job_gather *g;
612
613                 if (job->cmds[i].is_wait)
614                         continue;
615                 g = &job->cmds[i].gather;
616
617                 /* process each gather mem only once */
618                 if (g->handled)
619                         continue;
620
621                 /* copy_gathers() sets gathers base if firewall is enabled */
622                 if (!job->enable_firewall)
623                         g->base = job->gather_addr_phys[i];
624
625                 for (j = i + 1; j < job->num_cmds; j++) {
626                         if (!job->cmds[j].is_wait &&
627                             job->cmds[j].gather.bo == g->bo) {
628                                 job->cmds[j].gather.handled = true;
629                                 job->cmds[j].gather.base = g->base;
630                         }
631                 }
632
633                 err = do_relocs(job, g);
634                 if (err)
635                         break;
636         }
637
638 out:
639         if (err)
640                 host1x_job_unpin(job);
641         wmb();
642
643         return err;
644 }
645 EXPORT_SYMBOL(host1x_job_pin);
646
647 void host1x_job_unpin(struct host1x_job *job)
648 {
649         struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
650         unsigned int i;
651
652         for (i = 0; i < job->num_unpins; i++) {
653                 struct host1x_bo_mapping *map = job->unpins[i].map;
654                 struct host1x_bo *bo = map->bo;
655
656                 if (!job->enable_firewall && map->size && host->domain) {
657                         iommu_unmap(host->domain, job->addr_phys[i], map->size);
658                         free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
659                 }
660
661                 host1x_bo_unpin(map);
662                 host1x_bo_put(bo);
663         }
664
665         job->num_unpins = 0;
666
667         if (job->gather_copy_size)
668                 dma_free_wc(host->dev, job->gather_copy_size,
669                             job->gather_copy_mapped, job->gather_copy);
670 }
671 EXPORT_SYMBOL(host1x_job_unpin);
672
673 /*
674  * Debug routine used to dump job entries
675  */
676 void host1x_job_dump(struct device *dev, struct host1x_job *job)
677 {
678         dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt->id);
679         dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
680         dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
681         dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
682         dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
683         dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
684 }