amdkfd: use schedule() in sync_with_hw
[linux-block.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
CommitLineData
64c7f8cf
BG
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/slab.h>
25#include <linux/list.h>
26#include <linux/types.h>
27#include <linux/printk.h>
28#include <linux/bitops.h>
29#include "kfd_priv.h"
30#include "kfd_device_queue_manager.h"
31#include "kfd_mqd_manager.h"
32#include "cik_regs.h"
33#include "kfd_kernel_queue.h"
34#include "../../radeon/cik_reg.h"
35
36/* Size of the per-pipe EOP queue */
37#define CIK_HPD_EOP_BYTES_LOG2 11
38#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
39
40static bool is_mem_initialized;
41
42static int init_memory(struct device_queue_manager *dqm);
43static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
44 unsigned int pasid, unsigned int vmid);
45
46static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
47 struct queue *q,
48 struct qcm_process_device *qpd);
49static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
50static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
51
52
53static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
54{
55 BUG_ON(!dqm || !dqm->dev);
56 return dqm->dev->shared_resources.compute_pipe_count;
57}
58
59static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
60{
61 BUG_ON(!dqm);
62 return dqm->dev->shared_resources.first_compute_pipe;
63}
64
65static inline unsigned int get_pipes_num_cpsch(void)
66{
67 return PIPE_PER_ME_CP_SCHEDULING;
68}
69
70static unsigned int get_sh_mem_bases_nybble_64(struct kfd_process *process,
71 struct kfd_dev *dev)
72{
73 struct kfd_process_device *pdd;
74 uint32_t nybble;
75
76 pdd = kfd_get_process_device_data(dev, process, 1);
77 nybble = (pdd->lds_base >> 60) & 0x0E;
78
79 return nybble;
80
81}
82
83static unsigned int get_sh_mem_bases_32(struct kfd_process *process,
84 struct kfd_dev *dev)
85{
86 struct kfd_process_device *pdd;
87 unsigned int shared_base;
88
89 pdd = kfd_get_process_device_data(dev, process, 1);
90 shared_base = (pdd->lds_base >> 16) & 0xFF;
91
92 return shared_base;
93}
94
95static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
96static void init_process_memory(struct device_queue_manager *dqm,
97 struct qcm_process_device *qpd)
98{
99 unsigned int temp;
100
101 BUG_ON(!dqm || !qpd);
102
103 /* check if sh_mem_config register already configured */
104 if (qpd->sh_mem_config == 0) {
105 qpd->sh_mem_config =
106 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
107 DEFAULT_MTYPE(MTYPE_NONCACHED) |
108 APE1_MTYPE(MTYPE_NONCACHED);
109 qpd->sh_mem_ape1_limit = 0;
110 qpd->sh_mem_ape1_base = 0;
111 }
112
113 if (qpd->pqm->process->is_32bit_user_mode) {
114 temp = get_sh_mem_bases_32(qpd->pqm->process, dqm->dev);
115 qpd->sh_mem_bases = SHARED_BASE(temp);
116 qpd->sh_mem_config |= PTR32;
117 } else {
118 temp = get_sh_mem_bases_nybble_64(qpd->pqm->process, dqm->dev);
119 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
120 }
121
122 pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
123 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
124}
125
126static void program_sh_mem_settings(struct device_queue_manager *dqm,
127 struct qcm_process_device *qpd)
128{
129 return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
130 qpd->sh_mem_config,
131 qpd->sh_mem_ape1_base,
132 qpd->sh_mem_ape1_limit,
133 qpd->sh_mem_bases);
134}
135
136static int allocate_vmid(struct device_queue_manager *dqm,
137 struct qcm_process_device *qpd,
138 struct queue *q)
139{
140 int bit, allocated_vmid;
141
142 if (dqm->vmid_bitmap == 0)
143 return -ENOMEM;
144
145 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
146 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
147
148 /* Kaveri kfd vmid's starts from vmid 8 */
149 allocated_vmid = bit + KFD_VMID_START_OFFSET;
150 pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
151 qpd->vmid = allocated_vmid;
152 q->properties.vmid = allocated_vmid;
153
154 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
155 program_sh_mem_settings(dqm, qpd);
156
157 return 0;
158}
159
160static void deallocate_vmid(struct device_queue_manager *dqm,
161 struct qcm_process_device *qpd,
162 struct queue *q)
163{
164 int bit = qpd->vmid - KFD_VMID_START_OFFSET;
165
166 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
167 qpd->vmid = 0;
168 q->properties.vmid = 0;
169}
170
171static int create_queue_nocpsch(struct device_queue_manager *dqm,
172 struct queue *q,
173 struct qcm_process_device *qpd,
174 int *allocated_vmid)
175{
176 int retval;
177
178 BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
179
180 pr_debug("kfd: In func %s\n", __func__);
181 print_queue(q);
182
183 mutex_lock(&dqm->lock);
184
185 if (list_empty(&qpd->queues_list)) {
186 retval = allocate_vmid(dqm, qpd, q);
187 if (retval != 0) {
188 mutex_unlock(&dqm->lock);
189 return retval;
190 }
191 }
192 *allocated_vmid = qpd->vmid;
193 q->properties.vmid = qpd->vmid;
194
195 retval = create_compute_queue_nocpsch(dqm, q, qpd);
196
197 if (retval != 0) {
198 if (list_empty(&qpd->queues_list)) {
199 deallocate_vmid(dqm, qpd, q);
200 *allocated_vmid = 0;
201 }
202 mutex_unlock(&dqm->lock);
203 return retval;
204 }
205
206 list_add(&q->list, &qpd->queues_list);
207 dqm->queue_count++;
208
209 mutex_unlock(&dqm->lock);
210 return 0;
211}
212
213static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
214{
215 bool set;
216 int pipe, bit;
217
218 set = false;
219
220 for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
221 pipe = (pipe + 1) % get_pipes_num(dqm)) {
222 if (dqm->allocated_queues[pipe] != 0) {
223 bit = find_first_bit(
224 (unsigned long *)&dqm->allocated_queues[pipe],
225 QUEUES_PER_PIPE);
226
227 clear_bit(bit,
228 (unsigned long *)&dqm->allocated_queues[pipe]);
229 q->pipe = pipe;
230 q->queue = bit;
231 set = true;
232 break;
233 }
234 }
235
236 if (set == false)
237 return -EBUSY;
238
239 pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
240 __func__, q->pipe, q->queue);
241 /* horizontal hqd allocation */
242 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
243
244 return 0;
245}
246
247static inline void deallocate_hqd(struct device_queue_manager *dqm,
248 struct queue *q)
249{
250 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
251}
252
253static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
254 struct queue *q,
255 struct qcm_process_device *qpd)
256{
257 int retval;
258 struct mqd_manager *mqd;
259
260 BUG_ON(!dqm || !q || !qpd);
261
262 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
263 if (mqd == NULL)
264 return -ENOMEM;
265
266 retval = allocate_hqd(dqm, q);
267 if (retval != 0)
268 return retval;
269
270 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
271 &q->gart_mqd_addr, &q->properties);
272 if (retval != 0) {
273 deallocate_hqd(dqm, q);
274 return retval;
275 }
276
277 return 0;
278}
279
280static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
281 struct qcm_process_device *qpd,
282 struct queue *q)
283{
284 int retval;
285 struct mqd_manager *mqd;
286
287 BUG_ON(!dqm || !q || !q->mqd || !qpd);
288
289 retval = 0;
290
291 pr_debug("kfd: In Func %s\n", __func__);
292
293 mutex_lock(&dqm->lock);
294 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
295 if (mqd == NULL) {
296 retval = -ENOMEM;
297 goto out;
298 }
299
300 retval = mqd->destroy_mqd(mqd, q->mqd,
301 KFD_PREEMPT_TYPE_WAVEFRONT,
302 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
303 q->pipe, q->queue);
304
305 if (retval != 0)
306 goto out;
307
308 deallocate_hqd(dqm, q);
309
310 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
311
312 list_del(&q->list);
313 if (list_empty(&qpd->queues_list))
314 deallocate_vmid(dqm, qpd, q);
315 dqm->queue_count--;
316out:
317 mutex_unlock(&dqm->lock);
318 return retval;
319}
320
321static int update_queue(struct device_queue_manager *dqm, struct queue *q)
322{
323 int retval;
324 struct mqd_manager *mqd;
325
326 BUG_ON(!dqm || !q || !q->mqd);
327
328 mutex_lock(&dqm->lock);
329 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
330 if (mqd == NULL) {
331 mutex_unlock(&dqm->lock);
332 return -ENOMEM;
333 }
334
335 retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
336 if (q->properties.is_active == true)
337 dqm->queue_count++;
338 else
339 dqm->queue_count--;
340
341 if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
342 retval = execute_queues_cpsch(dqm, false);
343
344 mutex_unlock(&dqm->lock);
345 return retval;
346}
347
348static struct mqd_manager *get_mqd_manager_nocpsch(
349 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
350{
351 struct mqd_manager *mqd;
352
353 BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
354
355 pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
356
357 mqd = dqm->mqds[type];
358 if (!mqd) {
359 mqd = mqd_manager_init(type, dqm->dev);
360 if (mqd == NULL)
361 pr_err("kfd: mqd manager is NULL");
362 dqm->mqds[type] = mqd;
363 }
364
365 return mqd;
366}
367
368static int register_process_nocpsch(struct device_queue_manager *dqm,
369 struct qcm_process_device *qpd)
370{
371 struct device_process_node *n;
372
373 BUG_ON(!dqm || !qpd);
374
375 pr_debug("kfd: In func %s\n", __func__);
376
377 n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
378 if (!n)
379 return -ENOMEM;
380
381 n->qpd = qpd;
382
383 mutex_lock(&dqm->lock);
384 list_add(&n->list, &dqm->queues);
385
386 init_process_memory(dqm, qpd);
387 dqm->processes_count++;
388
389 mutex_unlock(&dqm->lock);
390
391 return 0;
392}
393
394static int unregister_process_nocpsch(struct device_queue_manager *dqm,
395 struct qcm_process_device *qpd)
396{
397 int retval;
398 struct device_process_node *cur, *next;
399
400 BUG_ON(!dqm || !qpd);
401
402 BUG_ON(!list_empty(&qpd->queues_list));
403
404 pr_debug("kfd: In func %s\n", __func__);
405
406 retval = 0;
407 mutex_lock(&dqm->lock);
408
409 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
410 if (qpd == cur->qpd) {
411 list_del(&cur->list);
f5d896bb 412 kfree(cur);
64c7f8cf
BG
413 dqm->processes_count--;
414 goto out;
415 }
416 }
417 /* qpd not found in dqm list */
418 retval = 1;
419out:
420 mutex_unlock(&dqm->lock);
421 return retval;
422}
423
424static int
425set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
426 unsigned int vmid)
427{
428 uint32_t pasid_mapping;
429
430 pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
431 ATC_VMID_PASID_MAPPING_VALID;
432 return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping,
433 vmid);
434}
435
436static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
437{
438 /* In 64-bit mode, we can only control the top 3 bits of the LDS,
439 * scratch and GPUVM apertures.
440 * The hardware fills in the remaining 59 bits according to the
441 * following pattern:
442 * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
443 * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
444 * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
445 *
446 * (where X/Y is the configurable nybble with the low-bit 0)
447 *
448 * LDS and scratch will have the same top nybble programmed in the
449 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
450 * GPUVM can have a different top nybble programmed in the
451 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
452 * We don't bother to support different top nybbles
453 * for LDS/Scratch and GPUVM.
454 */
455
456 BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
457 top_address_nybble == 0);
458
459 return PRIVATE_BASE(top_address_nybble << 12) |
460 SHARED_BASE(top_address_nybble << 12);
461}
462
463static int init_memory(struct device_queue_manager *dqm)
464{
465 int i, retval;
466
467 for (i = 8; i < 16; i++)
468 set_pasid_vmid_mapping(dqm, 0, i);
469
470 retval = kfd2kgd->init_memory(dqm->dev->kgd);
471 if (retval == 0)
472 is_mem_initialized = true;
473 return retval;
474}
475
476
477static int init_pipelines(struct device_queue_manager *dqm,
478 unsigned int pipes_num, unsigned int first_pipe)
479{
480 void *hpdptr;
481 struct mqd_manager *mqd;
482 unsigned int i, err, inx;
483 uint64_t pipe_hpd_addr;
484
485 BUG_ON(!dqm || !dqm->dev);
486
487 pr_debug("kfd: In func %s\n", __func__);
488
489 /*
490 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
491 * The driver never accesses this memory after zeroing it.
492 * It doesn't even have to be saved/restored on suspend/resume
493 * because it contains no data when there are no active queues.
494 */
495
496 err = kfd2kgd->allocate_mem(dqm->dev->kgd,
497 CIK_HPD_EOP_BYTES * pipes_num,
498 PAGE_SIZE,
499 KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
500 (struct kgd_mem **) &dqm->pipeline_mem);
501
502 if (err) {
503 pr_err("kfd: error allocate vidmem num pipes: %d\n",
504 pipes_num);
505 return -ENOMEM;
506 }
507
508 hpdptr = dqm->pipeline_mem->cpu_ptr;
509 dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
510
511 memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
512
513 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
514 if (mqd == NULL) {
515 kfd2kgd->free_mem(dqm->dev->kgd,
516 (struct kgd_mem *) dqm->pipeline_mem);
517 return -ENOMEM;
518 }
519
520 for (i = 0; i < pipes_num; i++) {
521 inx = i + first_pipe;
522 pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
523 pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
524 /* = log2(bytes/4)-1 */
525 kfd2kgd->init_pipeline(dqm->dev->kgd, i,
526 CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
527 }
528
529 return 0;
530}
531
532
533static int init_scheduler(struct device_queue_manager *dqm)
534{
535 int retval;
536
537 BUG_ON(!dqm);
538
539 pr_debug("kfd: In %s\n", __func__);
540
541 retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
542 if (retval != 0)
543 return retval;
544
545 retval = init_memory(dqm);
546
547 return retval;
548}
549
550static int initialize_nocpsch(struct device_queue_manager *dqm)
551{
552 int i;
553
554 BUG_ON(!dqm);
555
556 pr_debug("kfd: In func %s num of pipes: %d\n",
557 __func__, get_pipes_num(dqm));
558
559 mutex_init(&dqm->lock);
560 INIT_LIST_HEAD(&dqm->queues);
561 dqm->queue_count = dqm->next_pipe_to_allocate = 0;
562 dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
563 sizeof(unsigned int), GFP_KERNEL);
564 if (!dqm->allocated_queues) {
565 mutex_destroy(&dqm->lock);
566 return -ENOMEM;
567 }
568
569 for (i = 0; i < get_pipes_num(dqm); i++)
570 dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
571
572 dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
573
574 init_scheduler(dqm);
575 return 0;
576}
577
578static void uninitialize_nocpsch(struct device_queue_manager *dqm)
579{
580 BUG_ON(!dqm);
581
582 BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
583
584 kfree(dqm->allocated_queues);
585 mutex_destroy(&dqm->lock);
586 kfd2kgd->free_mem(dqm->dev->kgd,
587 (struct kgd_mem *) dqm->pipeline_mem);
588}
589
590static int start_nocpsch(struct device_queue_manager *dqm)
591{
592 return 0;
593}
594
595static int stop_nocpsch(struct device_queue_manager *dqm)
596{
597 return 0;
598}
599
600/*
601 * Device Queue Manager implementation for cp scheduler
602 */
603
604static int set_sched_resources(struct device_queue_manager *dqm)
605{
606 struct scheduling_resources res;
607 unsigned int queue_num, queue_mask;
608
609 BUG_ON(!dqm);
610
611 pr_debug("kfd: In func %s\n", __func__);
612
613 queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
614 queue_mask = (1 << queue_num) - 1;
615 res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
616 res.vmid_mask <<= KFD_VMID_START_OFFSET;
617 res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
618 res.gws_mask = res.oac_mask = res.gds_heap_base =
619 res.gds_heap_size = 0;
620
621 pr_debug("kfd: scheduling resources:\n"
622 " vmid mask: 0x%8X\n"
623 " queue mask: 0x%8llX\n",
624 res.vmid_mask, res.queue_mask);
625
626 return pm_send_set_resources(&dqm->packets, &res);
627}
628
629static int initialize_cpsch(struct device_queue_manager *dqm)
630{
631 int retval;
632
633 BUG_ON(!dqm);
634
635 pr_debug("kfd: In func %s num of pipes: %d\n",
636 __func__, get_pipes_num_cpsch());
637
638 mutex_init(&dqm->lock);
639 INIT_LIST_HEAD(&dqm->queues);
640 dqm->queue_count = dqm->processes_count = 0;
641 dqm->active_runlist = false;
642 retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
643 if (retval != 0)
644 goto fail_init_pipelines;
645
646 return 0;
647
648fail_init_pipelines:
649 mutex_destroy(&dqm->lock);
650 return retval;
651}
652
653static int start_cpsch(struct device_queue_manager *dqm)
654{
655 struct device_process_node *node;
656 int retval;
657
658 BUG_ON(!dqm);
659
660 retval = 0;
661
662 retval = pm_init(&dqm->packets, dqm);
663 if (retval != 0)
664 goto fail_packet_manager_init;
665
666 retval = set_sched_resources(dqm);
667 if (retval != 0)
668 goto fail_set_sched_resources;
669
670 pr_debug("kfd: allocating fence memory\n");
671
672 /* allocate fence memory on the gart */
673 retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
674 sizeof(*dqm->fence_addr),
675 32,
676 KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
677 (struct kgd_mem **) &dqm->fence_mem);
678
679 if (retval != 0)
680 goto fail_allocate_vidmem;
681
682 dqm->fence_addr = dqm->fence_mem->cpu_ptr;
683 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
684
685 list_for_each_entry(node, &dqm->queues, list)
686 if (node->qpd->pqm->process && dqm->dev)
687 kfd_bind_process_to_device(dqm->dev,
688 node->qpd->pqm->process);
689
690 execute_queues_cpsch(dqm, true);
691
692 return 0;
693fail_allocate_vidmem:
694fail_set_sched_resources:
695 pm_uninit(&dqm->packets);
696fail_packet_manager_init:
697 return retval;
698}
699
700static int stop_cpsch(struct device_queue_manager *dqm)
701{
702 struct device_process_node *node;
703 struct kfd_process_device *pdd;
704
705 BUG_ON(!dqm);
706
707 destroy_queues_cpsch(dqm, true);
708
709 list_for_each_entry(node, &dqm->queues, list) {
710 pdd = kfd_get_process_device_data(dqm->dev,
711 node->qpd->pqm->process, 1);
712 pdd->bound = false;
713 }
714 kfd2kgd->free_mem(dqm->dev->kgd,
715 (struct kgd_mem *) dqm->fence_mem);
716 pm_uninit(&dqm->packets);
717
718 return 0;
719}
720
721static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
722 struct kernel_queue *kq,
723 struct qcm_process_device *qpd)
724{
725 BUG_ON(!dqm || !kq || !qpd);
726
727 pr_debug("kfd: In func %s\n", __func__);
728
729 mutex_lock(&dqm->lock);
730 list_add(&kq->list, &qpd->priv_queue_list);
731 dqm->queue_count++;
732 qpd->is_debug = true;
733 execute_queues_cpsch(dqm, false);
734 mutex_unlock(&dqm->lock);
735
736 return 0;
737}
738
739static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
740 struct kernel_queue *kq,
741 struct qcm_process_device *qpd)
742{
743 BUG_ON(!dqm || !kq);
744
745 pr_debug("kfd: In %s\n", __func__);
746
747 mutex_lock(&dqm->lock);
748 destroy_queues_cpsch(dqm, false);
749 list_del(&kq->list);
750 dqm->queue_count--;
751 qpd->is_debug = false;
752 execute_queues_cpsch(dqm, false);
753 mutex_unlock(&dqm->lock);
754}
755
756static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
757 struct qcm_process_device *qpd, int *allocate_vmid)
758{
759 int retval;
760 struct mqd_manager *mqd;
761
762 BUG_ON(!dqm || !q || !qpd);
763
764 retval = 0;
765
766 if (allocate_vmid)
767 *allocate_vmid = 0;
768
769 mutex_lock(&dqm->lock);
770
771 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
772 if (mqd == NULL) {
773 mutex_unlock(&dqm->lock);
774 return -ENOMEM;
775 }
776
777 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
778 &q->gart_mqd_addr, &q->properties);
779 if (retval != 0)
780 goto out;
781
782 list_add(&q->list, &qpd->queues_list);
783 if (q->properties.is_active) {
784 dqm->queue_count++;
785 retval = execute_queues_cpsch(dqm, false);
786 }
787
788out:
789 mutex_unlock(&dqm->lock);
790 return retval;
791}
792
d80d19bd
OG
793static int fence_wait_timeout(unsigned int *fence_addr,
794 unsigned int fence_value,
795 unsigned long timeout)
64c7f8cf
BG
796{
797 BUG_ON(!fence_addr);
798 timeout += jiffies;
799
800 while (*fence_addr != fence_value) {
801 if (time_after(jiffies, timeout)) {
802 pr_err("kfd: qcm fence wait loop timeout expired\n");
803 return -ETIME;
804 }
805 cpu_relax();
806 }
807
808 return 0;
809}
810
811static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
812{
813 int retval;
814
815 BUG_ON(!dqm);
816
817 retval = 0;
818
819 if (lock)
820 mutex_lock(&dqm->lock);
821 if (dqm->active_runlist == false)
822 goto out;
823 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
824 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
825 if (retval != 0)
826 goto out;
827
828 *dqm->fence_addr = KFD_FENCE_INIT;
829 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
830 KFD_FENCE_COMPLETED);
831 /* should be timed out */
832 fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
833 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
834 pm_release_ib(&dqm->packets);
835 dqm->active_runlist = false;
836
837out:
838 if (lock)
839 mutex_unlock(&dqm->lock);
840 return retval;
841}
842
843static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
844{
845 int retval;
846
847 BUG_ON(!dqm);
848
849 if (lock)
850 mutex_lock(&dqm->lock);
851
852 retval = destroy_queues_cpsch(dqm, false);
853 if (retval != 0) {
854 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
855 goto out;
856 }
857
858 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
859 retval = 0;
860 goto out;
861 }
862
863 if (dqm->active_runlist) {
864 retval = 0;
865 goto out;
866 }
867
868 retval = pm_send_runlist(&dqm->packets, &dqm->queues);
869 if (retval != 0) {
870 pr_err("kfd: failed to execute runlist");
871 goto out;
872 }
873 dqm->active_runlist = true;
874
875out:
876 if (lock)
877 mutex_unlock(&dqm->lock);
878 return retval;
879}
880
881static int destroy_queue_cpsch(struct device_queue_manager *dqm,
882 struct qcm_process_device *qpd,
883 struct queue *q)
884{
885 int retval;
886 struct mqd_manager *mqd;
887
888 BUG_ON(!dqm || !qpd || !q);
889
890 retval = 0;
891
892 /* remove queue from list to prevent rescheduling after preemption */
893 mutex_lock(&dqm->lock);
894
895 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
896 if (!mqd) {
897 retval = -ENOMEM;
898 goto failed;
899 }
900
901 list_del(&q->list);
902 dqm->queue_count--;
903
904 execute_queues_cpsch(dqm, false);
905
906 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
907
908 mutex_unlock(&dqm->lock);
909
910 return 0;
911
912failed:
913 mutex_unlock(&dqm->lock);
914 return retval;
915}
916
917/*
918 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
919 * stay in user mode.
920 */
921#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
922/* APE1 limit is inclusive and 64K aligned. */
923#define APE1_LIMIT_ALIGNMENT 0xFFFF
924
925static bool set_cache_memory_policy(struct device_queue_manager *dqm,
926 struct qcm_process_device *qpd,
927 enum cache_policy default_policy,
928 enum cache_policy alternate_policy,
929 void __user *alternate_aperture_base,
930 uint64_t alternate_aperture_size)
931{
932 uint32_t default_mtype;
933 uint32_t ape1_mtype;
934
935 pr_debug("kfd: In func %s\n", __func__);
936
937 mutex_lock(&dqm->lock);
938
939 if (alternate_aperture_size == 0) {
940 /* base > limit disables APE1 */
941 qpd->sh_mem_ape1_base = 1;
942 qpd->sh_mem_ape1_limit = 0;
943 } else {
944 /*
945 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
946 * SH_MEM_APE1_BASE[31:0], 0x0000 }
947 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
948 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
949 * Verify that the base and size parameters can be
950 * represented in this format and convert them.
951 * Additionally restrict APE1 to user-mode addresses.
952 */
953
954 uint64_t base = (uintptr_t)alternate_aperture_base;
955 uint64_t limit = base + alternate_aperture_size - 1;
956
957 if (limit <= base)
958 goto out;
959
960 if ((base & APE1_FIXED_BITS_MASK) != 0)
961 goto out;
962
963 if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
964 goto out;
965
966 qpd->sh_mem_ape1_base = base >> 16;
967 qpd->sh_mem_ape1_limit = limit >> 16;
968 }
969
970 default_mtype = (default_policy == cache_policy_coherent) ?
971 MTYPE_NONCACHED :
972 MTYPE_CACHED;
973
974 ape1_mtype = (alternate_policy == cache_policy_coherent) ?
975 MTYPE_NONCACHED :
976 MTYPE_CACHED;
977
978 qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
979 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
980 | DEFAULT_MTYPE(default_mtype)
981 | APE1_MTYPE(ape1_mtype);
982
983 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
984 program_sh_mem_settings(dqm, qpd);
985
986 pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
987 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
988 qpd->sh_mem_ape1_limit);
989
990 mutex_unlock(&dqm->lock);
991 return true;
992
993out:
994 mutex_unlock(&dqm->lock);
995 return false;
996}
997
998struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
999{
1000 struct device_queue_manager *dqm;
1001
1002 BUG_ON(!dev);
1003
1004 dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
1005 if (!dqm)
1006 return NULL;
1007
1008 dqm->dev = dev;
1009 switch (sched_policy) {
1010 case KFD_SCHED_POLICY_HWS:
1011 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1012 /* initialize dqm for cp scheduling */
1013 dqm->create_queue = create_queue_cpsch;
1014 dqm->initialize = initialize_cpsch;
1015 dqm->start = start_cpsch;
1016 dqm->stop = stop_cpsch;
1017 dqm->destroy_queue = destroy_queue_cpsch;
1018 dqm->update_queue = update_queue;
1019 dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1020 dqm->register_process = register_process_nocpsch;
1021 dqm->unregister_process = unregister_process_nocpsch;
1022 dqm->uninitialize = uninitialize_nocpsch;
1023 dqm->create_kernel_queue = create_kernel_queue_cpsch;
1024 dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch;
1025 dqm->set_cache_memory_policy = set_cache_memory_policy;
1026 break;
1027 case KFD_SCHED_POLICY_NO_HWS:
1028 /* initialize dqm for no cp scheduling */
1029 dqm->start = start_nocpsch;
1030 dqm->stop = stop_nocpsch;
1031 dqm->create_queue = create_queue_nocpsch;
1032 dqm->destroy_queue = destroy_queue_nocpsch;
1033 dqm->update_queue = update_queue;
1034 dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1035 dqm->register_process = register_process_nocpsch;
1036 dqm->unregister_process = unregister_process_nocpsch;
1037 dqm->initialize = initialize_nocpsch;
1038 dqm->uninitialize = uninitialize_nocpsch;
1039 dqm->set_cache_memory_policy = set_cache_memory_policy;
1040 break;
1041 default:
1042 BUG();
1043 break;
1044 }
1045
1046 if (dqm->initialize(dqm) != 0) {
1047 kfree(dqm);
1048 return NULL;
1049 }
1050
1051 return dqm;
1052}
1053
1054void device_queue_manager_uninit(struct device_queue_manager *dqm)
1055{
1056 BUG_ON(!dqm);
1057
1058 dqm->uninitialize(dqm);
1059 kfree(dqm);
1060}
1061