drm/amdkfd: move locking outside of unmap_queues_cpsch
[linux-block.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
CommitLineData
64c7f8cf
BG
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/slab.h>
25#include <linux/list.h>
26#include <linux/types.h>
27#include <linux/printk.h>
28#include <linux/bitops.h>
99331a51 29#include <linux/sched.h>
64c7f8cf
BG
30#include "kfd_priv.h"
31#include "kfd_device_queue_manager.h"
32#include "kfd_mqd_manager.h"
33#include "cik_regs.h"
34#include "kfd_kernel_queue.h"
64c7f8cf
BG
35
36/* Size of the per-pipe EOP queue */
37#define CIK_HPD_EOP_BYTES_LOG2 11
38#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
39
64c7f8cf
BG
40static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
41 unsigned int pasid, unsigned int vmid);
42
43static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
44 struct queue *q,
45 struct qcm_process_device *qpd);
bcea3081 46
ac30c783 47static int execute_queues_cpsch(struct device_queue_manager *dqm);
7da2bcf8 48static int unmap_queues_cpsch(struct device_queue_manager *dqm,
ac30c783 49 bool static_queues_included);
64c7f8cf 50
bcea3081
BG
51static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
52 struct queue *q,
53 struct qcm_process_device *qpd);
54
55static void deallocate_sdma_queue(struct device_queue_manager *dqm,
56 unsigned int sdma_queue_id);
64c7f8cf 57
bcea3081
BG
58static inline
59enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
64c7f8cf 60{
bcea3081 61 if (type == KFD_QUEUE_TYPE_SDMA)
85d258f9
BG
62 return KFD_MQD_TYPE_SDMA;
63 return KFD_MQD_TYPE_CP;
64c7f8cf
BG
64}
65
d0b63bb3
AR
66static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
67{
68 int i;
69 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
70 + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
71
72 /* queue is available for KFD usage if bit is 1 */
73 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
74 if (test_bit(pipe_offset + i,
75 dqm->dev->shared_resources.queue_bitmap))
76 return true;
77 return false;
78}
79
d0b63bb3 80unsigned int get_queues_num(struct device_queue_manager *dqm)
64ea8f4a 81{
d0b63bb3
AR
82 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
83 KGD_MAX_QUEUES);
64ea8f4a
OG
84}
85
d0b63bb3 86unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
64c7f8cf 87{
d0b63bb3
AR
88 return dqm->dev->shared_resources.num_queue_per_pipe;
89}
90
91unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
92{
d0b63bb3 93 return dqm->dev->shared_resources.num_pipe_per_mec;
64c7f8cf
BG
94}
95
a22fc854 96void program_sh_mem_settings(struct device_queue_manager *dqm,
64c7f8cf
BG
97 struct qcm_process_device *qpd)
98{
cea405b1
XZ
99 return dqm->dev->kfd2kgd->program_sh_mem_settings(
100 dqm->dev->kgd, qpd->vmid,
64c7f8cf
BG
101 qpd->sh_mem_config,
102 qpd->sh_mem_ape1_base,
103 qpd->sh_mem_ape1_limit,
104 qpd->sh_mem_bases);
105}
106
107static int allocate_vmid(struct device_queue_manager *dqm,
108 struct qcm_process_device *qpd,
109 struct queue *q)
110{
111 int bit, allocated_vmid;
112
113 if (dqm->vmid_bitmap == 0)
114 return -ENOMEM;
115
44008d7a
YZ
116 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap,
117 dqm->dev->vm_info.vmid_num_kfd);
64c7f8cf
BG
118 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
119
44008d7a 120 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
79775b62 121 pr_debug("vmid allocation %d\n", allocated_vmid);
64c7f8cf
BG
122 qpd->vmid = allocated_vmid;
123 q->properties.vmid = allocated_vmid;
124
125 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
126 program_sh_mem_settings(dqm, qpd);
127
128 return 0;
129}
130
131static void deallocate_vmid(struct device_queue_manager *dqm,
132 struct qcm_process_device *qpd,
133 struct queue *q)
134{
44008d7a 135 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
64c7f8cf 136
2030664b
BG
137 /* Release the vmid mapping */
138 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
139
64c7f8cf
BG
140 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
141 qpd->vmid = 0;
142 q->properties.vmid = 0;
143}
144
145static int create_queue_nocpsch(struct device_queue_manager *dqm,
146 struct queue *q,
147 struct qcm_process_device *qpd,
148 int *allocated_vmid)
149{
150 int retval;
151
64c7f8cf
BG
152 print_queue(q);
153
154 mutex_lock(&dqm->lock);
155
b8cbab04 156 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 157 pr_warn("Can't create new usermode queue because %d queues were already created\n",
b8cbab04 158 dqm->total_queue_count);
ab7c1648
KR
159 retval = -EPERM;
160 goto out_unlock;
b8cbab04
OG
161 }
162
64c7f8cf
BG
163 if (list_empty(&qpd->queues_list)) {
164 retval = allocate_vmid(dqm, qpd, q);
ab7c1648
KR
165 if (retval)
166 goto out_unlock;
64c7f8cf
BG
167 }
168 *allocated_vmid = qpd->vmid;
169 q->properties.vmid = qpd->vmid;
170
bcea3081
BG
171 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
172 retval = create_compute_queue_nocpsch(dqm, q, qpd);
ab7c1648 173 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
bcea3081 174 retval = create_sdma_queue_nocpsch(dqm, q, qpd);
ab7c1648
KR
175 else
176 retval = -EINVAL;
64c7f8cf 177
4eacc26b 178 if (retval) {
64c7f8cf
BG
179 if (list_empty(&qpd->queues_list)) {
180 deallocate_vmid(dqm, qpd, q);
181 *allocated_vmid = 0;
182 }
ab7c1648 183 goto out_unlock;
64c7f8cf
BG
184 }
185
186 list_add(&q->list, &qpd->queues_list);
b6819cec
JC
187 if (q->properties.is_active)
188 dqm->queue_count++;
64c7f8cf 189
bcea3081
BG
190 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
191 dqm->sdma_queue_count++;
64c7f8cf 192
b8cbab04
OG
193 /*
194 * Unconditionally increment this counter, regardless of the queue's
195 * type or whether the queue is active.
196 */
197 dqm->total_queue_count++;
198 pr_debug("Total of %d queues are accountable so far\n",
199 dqm->total_queue_count);
200
ab7c1648 201out_unlock:
64c7f8cf 202 mutex_unlock(&dqm->lock);
ab7c1648 203 return retval;
64c7f8cf
BG
204}
205
206static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
207{
208 bool set;
f0ec5b99 209 int pipe, bit, i;
64c7f8cf
BG
210
211 set = false;
212
8eabaf54
KR
213 for (pipe = dqm->next_pipe_to_allocate, i = 0;
214 i < get_pipes_per_mec(dqm);
d0b63bb3
AR
215 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
216
217 if (!is_pipe_enabled(dqm, 0, pipe))
218 continue;
219
64c7f8cf
BG
220 if (dqm->allocated_queues[pipe] != 0) {
221 bit = find_first_bit(
222 (unsigned long *)&dqm->allocated_queues[pipe],
d0b63bb3 223 get_queues_per_pipe(dqm));
64c7f8cf
BG
224
225 clear_bit(bit,
226 (unsigned long *)&dqm->allocated_queues[pipe]);
227 q->pipe = pipe;
228 q->queue = bit;
229 set = true;
230 break;
231 }
232 }
233
991ca8ee 234 if (!set)
64c7f8cf
BG
235 return -EBUSY;
236
79775b62 237 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
64c7f8cf 238 /* horizontal hqd allocation */
d0b63bb3 239 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
64c7f8cf
BG
240
241 return 0;
242}
243
244static inline void deallocate_hqd(struct device_queue_manager *dqm,
245 struct queue *q)
246{
247 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
248}
249
250static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
251 struct queue *q,
252 struct qcm_process_device *qpd)
253{
254 int retval;
255 struct mqd_manager *mqd;
256
45c9a5e4 257 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
4eacc26b 258 if (!mqd)
64c7f8cf
BG
259 return -ENOMEM;
260
261 retval = allocate_hqd(dqm, q);
4eacc26b 262 if (retval)
64c7f8cf
BG
263 return retval;
264
265 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
266 &q->gart_mqd_addr, &q->properties);
ab7c1648
KR
267 if (retval)
268 goto out_deallocate_hqd;
64c7f8cf 269
79775b62
KR
270 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
271 q->pipe, q->queue);
030e416b 272
6a1c9510
MR
273 dqm->dev->kfd2kgd->set_scratch_backing_va(
274 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
275
70539bd7
FK
276 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
277 q->process->mm);
ab7c1648
KR
278 if (retval)
279 goto out_uninit_mqd;
030e416b 280
64c7f8cf 281 return 0;
ab7c1648
KR
282
283out_uninit_mqd:
284 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
285out_deallocate_hqd:
286 deallocate_hqd(dqm, q);
287
288 return retval;
64c7f8cf
BG
289}
290
291static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
292 struct qcm_process_device *qpd,
293 struct queue *q)
294{
295 int retval;
296 struct mqd_manager *mqd;
297
64c7f8cf
BG
298 retval = 0;
299
64c7f8cf 300 mutex_lock(&dqm->lock);
64c7f8cf 301
c2e1b3a4 302 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
45c9a5e4 303 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
c2e1b3a4
BG
304 if (mqd == NULL) {
305 retval = -ENOMEM;
306 goto out;
307 }
308 deallocate_hqd(dqm, q);
309 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
45c9a5e4 310 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
c2e1b3a4
BG
311 if (mqd == NULL) {
312 retval = -ENOMEM;
313 goto out;
314 }
315 dqm->sdma_queue_count--;
316 deallocate_sdma_queue(dqm, q->sdma_id);
7113cd65 317 } else {
79775b62 318 pr_debug("q->properties.type %d is invalid\n",
7113cd65
OG
319 q->properties.type);
320 retval = -EINVAL;
64c7f8cf
BG
321 goto out;
322 }
323
324 retval = mqd->destroy_mqd(mqd, q->mqd,
c2e1b3a4 325 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
b90e3fbe 326 KFD_UNMAP_LATENCY_MS,
64c7f8cf
BG
327 q->pipe, q->queue);
328
4eacc26b 329 if (retval)
64c7f8cf
BG
330 goto out;
331
64c7f8cf
BG
332 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
333
334 list_del(&q->list);
335 if (list_empty(&qpd->queues_list))
336 deallocate_vmid(dqm, qpd, q);
b6819cec
JC
337 if (q->properties.is_active)
338 dqm->queue_count--;
b8cbab04
OG
339
340 /*
341 * Unconditionally decrement this counter, regardless of the queue's
342 * type
343 */
344 dqm->total_queue_count--;
345 pr_debug("Total of %d queues are accountable so far\n",
346 dqm->total_queue_count);
347
64c7f8cf
BG
348out:
349 mutex_unlock(&dqm->lock);
350 return retval;
351}
352
353static int update_queue(struct device_queue_manager *dqm, struct queue *q)
354{
355 int retval;
356 struct mqd_manager *mqd;
b6ffbab8 357 bool prev_active = false;
64c7f8cf 358
64c7f8cf 359 mutex_lock(&dqm->lock);
0b3674ae
OG
360 mqd = dqm->ops.get_mqd_manager(dqm,
361 get_mqd_type_from_queue_type(q->properties.type));
4eacc26b 362 if (!mqd) {
ab7c1648
KR
363 retval = -ENOMEM;
364 goto out_unlock;
64c7f8cf
BG
365 }
366
991ca8ee 367 if (q->properties.is_active)
b6ffbab8
OG
368 prev_active = true;
369
370 /*
371 *
372 * check active state vs. the previous state
373 * and modify counter accordingly
374 */
375 retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
991ca8ee 376 if ((q->properties.is_active) && (!prev_active))
64c7f8cf 377 dqm->queue_count++;
4eacc26b 378 else if (!q->properties.is_active && prev_active)
64c7f8cf
BG
379 dqm->queue_count--;
380
381 if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
ac30c783 382 retval = execute_queues_cpsch(dqm);
64c7f8cf 383
ab7c1648 384out_unlock:
64c7f8cf
BG
385 mutex_unlock(&dqm->lock);
386 return retval;
387}
388
58dcd5bf 389static struct mqd_manager *get_mqd_manager(
64c7f8cf
BG
390 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
391{
392 struct mqd_manager *mqd;
393
32fa8219
FK
394 if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
395 return NULL;
64c7f8cf 396
79775b62 397 pr_debug("mqd type %d\n", type);
64c7f8cf
BG
398
399 mqd = dqm->mqds[type];
400 if (!mqd) {
401 mqd = mqd_manager_init(type, dqm->dev);
4eacc26b 402 if (!mqd)
79775b62 403 pr_err("mqd manager is NULL");
64c7f8cf
BG
404 dqm->mqds[type] = mqd;
405 }
406
407 return mqd;
408}
409
58dcd5bf 410static int register_process(struct device_queue_manager *dqm,
64c7f8cf
BG
411 struct qcm_process_device *qpd)
412{
413 struct device_process_node *n;
a22fc854 414 int retval;
64c7f8cf 415
dbf56ab1 416 n = kzalloc(sizeof(*n), GFP_KERNEL);
64c7f8cf
BG
417 if (!n)
418 return -ENOMEM;
419
420 n->qpd = qpd;
421
422 mutex_lock(&dqm->lock);
423 list_add(&n->list, &dqm->queues);
424
a22fc854
BG
425 retval = dqm->ops_asic_specific.register_process(dqm, qpd);
426
64c7f8cf
BG
427 dqm->processes_count++;
428
429 mutex_unlock(&dqm->lock);
430
a22fc854 431 return retval;
64c7f8cf
BG
432}
433
58dcd5bf 434static int unregister_process(struct device_queue_manager *dqm,
64c7f8cf
BG
435 struct qcm_process_device *qpd)
436{
437 int retval;
438 struct device_process_node *cur, *next;
439
1e5ec956
OG
440 pr_debug("qpd->queues_list is %s\n",
441 list_empty(&qpd->queues_list) ? "empty" : "not empty");
64c7f8cf
BG
442
443 retval = 0;
444 mutex_lock(&dqm->lock);
445
446 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
447 if (qpd == cur->qpd) {
448 list_del(&cur->list);
f5d896bb 449 kfree(cur);
64c7f8cf
BG
450 dqm->processes_count--;
451 goto out;
452 }
453 }
454 /* qpd not found in dqm list */
455 retval = 1;
456out:
457 mutex_unlock(&dqm->lock);
458 return retval;
459}
460
461static int
462set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
463 unsigned int vmid)
464{
465 uint32_t pasid_mapping;
466
cea405b1
XZ
467 pasid_mapping = (pasid == 0) ? 0 :
468 (uint32_t)pasid |
469 ATC_VMID_PASID_MAPPING_VALID;
470
471 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
472 dqm->dev->kgd, pasid_mapping,
64c7f8cf
BG
473 vmid);
474}
475
2249d558
AL
476static void init_interrupts(struct device_queue_manager *dqm)
477{
478 unsigned int i;
479
d0b63bb3
AR
480 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
481 if (is_pipe_enabled(dqm, 0, i))
482 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
2249d558
AL
483}
484
64c7f8cf
BG
485static int initialize_nocpsch(struct device_queue_manager *dqm)
486{
86194cf8 487 int pipe, queue;
64c7f8cf 488
79775b62 489 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
64c7f8cf 490
ab7c1648
KR
491 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
492 sizeof(unsigned int), GFP_KERNEL);
493 if (!dqm->allocated_queues)
494 return -ENOMEM;
495
64c7f8cf
BG
496 mutex_init(&dqm->lock);
497 INIT_LIST_HEAD(&dqm->queues);
498 dqm->queue_count = dqm->next_pipe_to_allocate = 0;
bcea3081 499 dqm->sdma_queue_count = 0;
64c7f8cf 500
86194cf8
FK
501 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
502 int pipe_offset = pipe * get_queues_per_pipe(dqm);
503
504 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
505 if (test_bit(pipe_offset + queue,
506 dqm->dev->shared_resources.queue_bitmap))
507 dqm->allocated_queues[pipe] |= 1 << queue;
508 }
64c7f8cf 509
44008d7a 510 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
bcea3081 511 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
64c7f8cf 512
64c7f8cf
BG
513 return 0;
514}
515
58dcd5bf 516static void uninitialize(struct device_queue_manager *dqm)
64c7f8cf 517{
6f9d54fd
OG
518 int i;
519
32fa8219 520 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
64c7f8cf
BG
521
522 kfree(dqm->allocated_queues);
6f9d54fd
OG
523 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
524 kfree(dqm->mqds[i]);
64c7f8cf 525 mutex_destroy(&dqm->lock);
a86aa3ca 526 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
64c7f8cf
BG
527}
528
529static int start_nocpsch(struct device_queue_manager *dqm)
530{
2249d558 531 init_interrupts(dqm);
64c7f8cf
BG
532 return 0;
533}
534
535static int stop_nocpsch(struct device_queue_manager *dqm)
536{
537 return 0;
538}
539
bcea3081
BG
540static int allocate_sdma_queue(struct device_queue_manager *dqm,
541 unsigned int *sdma_queue_id)
542{
543 int bit;
544
545 if (dqm->sdma_bitmap == 0)
546 return -ENOMEM;
547
548 bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
549 CIK_SDMA_QUEUES);
550
551 clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
552 *sdma_queue_id = bit;
553
554 return 0;
555}
556
557static void deallocate_sdma_queue(struct device_queue_manager *dqm,
558 unsigned int sdma_queue_id)
559{
010b82e7 560 if (sdma_queue_id >= CIK_SDMA_QUEUES)
bcea3081
BG
561 return;
562 set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
563}
564
bcea3081
BG
565static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
566 struct queue *q,
567 struct qcm_process_device *qpd)
568{
569 struct mqd_manager *mqd;
570 int retval;
571
45c9a5e4 572 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
bcea3081
BG
573 if (!mqd)
574 return -ENOMEM;
575
576 retval = allocate_sdma_queue(dqm, &q->sdma_id);
4eacc26b 577 if (retval)
bcea3081
BG
578 return retval;
579
580 q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
581 q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
582
79775b62
KR
583 pr_debug("SDMA id is: %d\n", q->sdma_id);
584 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
585 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
bcea3081 586
3e3f6e1a 587 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
bcea3081
BG
588 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
589 &q->gart_mqd_addr, &q->properties);
ab7c1648
KR
590 if (retval)
591 goto out_deallocate_sdma_queue;
bcea3081 592
70539bd7 593 retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
ab7c1648
KR
594 if (retval)
595 goto out_uninit_mqd;
4fadf6b6 596
bcea3081 597 return 0;
ab7c1648
KR
598
599out_uninit_mqd:
600 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
601out_deallocate_sdma_queue:
602 deallocate_sdma_queue(dqm, q->sdma_id);
603
604 return retval;
bcea3081
BG
605}
606
64c7f8cf
BG
607/*
608 * Device Queue Manager implementation for cp scheduler
609 */
610
611static int set_sched_resources(struct device_queue_manager *dqm)
612{
d0b63bb3 613 int i, mec;
64c7f8cf 614 struct scheduling_resources res;
64c7f8cf 615
44008d7a 616 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
d0b63bb3
AR
617
618 res.queue_mask = 0;
619 for (i = 0; i < KGD_MAX_QUEUES; ++i) {
620 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
621 / dqm->dev->shared_resources.num_pipe_per_mec;
622
623 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
624 continue;
625
626 /* only acquire queues from the first MEC */
627 if (mec > 0)
628 continue;
629
630 /* This situation may be hit in the future if a new HW
631 * generation exposes more than 64 queues. If so, the
8eabaf54
KR
632 * definition of res.queue_mask needs updating
633 */
1d11ee89 634 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
d0b63bb3
AR
635 pr_err("Invalid queue enabled by amdgpu: %d\n", i);
636 break;
637 }
638
639 res.queue_mask |= (1ull << i);
640 }
64c7f8cf
BG
641 res.gws_mask = res.oac_mask = res.gds_heap_base =
642 res.gds_heap_size = 0;
643
79775b62
KR
644 pr_debug("Scheduling resources:\n"
645 "vmid mask: 0x%8X\n"
646 "queue mask: 0x%8llX\n",
64c7f8cf
BG
647 res.vmid_mask, res.queue_mask);
648
649 return pm_send_set_resources(&dqm->packets, &res);
650}
651
652static int initialize_cpsch(struct device_queue_manager *dqm)
653{
654 int retval;
655
79775b62 656 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
64c7f8cf
BG
657
658 mutex_init(&dqm->lock);
659 INIT_LIST_HEAD(&dqm->queues);
660 dqm->queue_count = dqm->processes_count = 0;
bcea3081 661 dqm->sdma_queue_count = 0;
64c7f8cf 662 dqm->active_runlist = false;
a22fc854 663 retval = dqm->ops_asic_specific.initialize(dqm);
4eacc26b 664 if (retval)
ab7c1648 665 mutex_destroy(&dqm->lock);
64c7f8cf 666
64c7f8cf
BG
667 return retval;
668}
669
670static int start_cpsch(struct device_queue_manager *dqm)
671{
64c7f8cf
BG
672 int retval;
673
64c7f8cf
BG
674 retval = 0;
675
676 retval = pm_init(&dqm->packets, dqm);
4eacc26b 677 if (retval)
64c7f8cf
BG
678 goto fail_packet_manager_init;
679
680 retval = set_sched_resources(dqm);
4eacc26b 681 if (retval)
64c7f8cf
BG
682 goto fail_set_sched_resources;
683
79775b62 684 pr_debug("Allocating fence memory\n");
64c7f8cf
BG
685
686 /* allocate fence memory on the gart */
a86aa3ca
OG
687 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
688 &dqm->fence_mem);
64c7f8cf 689
4eacc26b 690 if (retval)
64c7f8cf
BG
691 goto fail_allocate_vidmem;
692
693 dqm->fence_addr = dqm->fence_mem->cpu_ptr;
694 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
2249d558
AL
695
696 init_interrupts(dqm);
697
ac30c783
YZ
698 mutex_lock(&dqm->lock);
699 execute_queues_cpsch(dqm);
700 mutex_unlock(&dqm->lock);
64c7f8cf
BG
701
702 return 0;
703fail_allocate_vidmem:
704fail_set_sched_resources:
705 pm_uninit(&dqm->packets);
706fail_packet_manager_init:
707 return retval;
708}
709
710static int stop_cpsch(struct device_queue_manager *dqm)
711{
ac30c783
YZ
712 mutex_lock(&dqm->lock);
713 unmap_queues_cpsch(dqm, true);
714 mutex_unlock(&dqm->lock);
64c7f8cf 715
a86aa3ca 716 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
64c7f8cf
BG
717 pm_uninit(&dqm->packets);
718
719 return 0;
720}
721
722static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
723 struct kernel_queue *kq,
724 struct qcm_process_device *qpd)
725{
64c7f8cf 726 mutex_lock(&dqm->lock);
b8cbab04 727 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 728 pr_warn("Can't create new kernel queue because %d queues were already created\n",
b8cbab04
OG
729 dqm->total_queue_count);
730 mutex_unlock(&dqm->lock);
731 return -EPERM;
732 }
733
734 /*
735 * Unconditionally increment this counter, regardless of the queue's
736 * type or whether the queue is active.
737 */
738 dqm->total_queue_count++;
739 pr_debug("Total of %d queues are accountable so far\n",
740 dqm->total_queue_count);
741
64c7f8cf
BG
742 list_add(&kq->list, &qpd->priv_queue_list);
743 dqm->queue_count++;
744 qpd->is_debug = true;
ac30c783 745 execute_queues_cpsch(dqm);
64c7f8cf
BG
746 mutex_unlock(&dqm->lock);
747
748 return 0;
749}
750
751static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
752 struct kernel_queue *kq,
753 struct qcm_process_device *qpd)
754{
64c7f8cf 755 mutex_lock(&dqm->lock);
992839ad 756 /* here we actually preempt the DIQ */
ac30c783 757 unmap_queues_cpsch(dqm, true);
64c7f8cf
BG
758 list_del(&kq->list);
759 dqm->queue_count--;
760 qpd->is_debug = false;
ac30c783 761 execute_queues_cpsch(dqm);
b8cbab04
OG
762 /*
763 * Unconditionally decrement this counter, regardless of the queue's
764 * type.
765 */
8b58f261 766 dqm->total_queue_count--;
b8cbab04
OG
767 pr_debug("Total of %d queues are accountable so far\n",
768 dqm->total_queue_count);
64c7f8cf
BG
769 mutex_unlock(&dqm->lock);
770}
771
bcea3081
BG
772static void select_sdma_engine_id(struct queue *q)
773{
774 static int sdma_id;
775
776 q->sdma_id = sdma_id;
777 sdma_id = (sdma_id + 1) % 2;
778}
779
64c7f8cf
BG
780static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
781 struct qcm_process_device *qpd, int *allocate_vmid)
782{
783 int retval;
784 struct mqd_manager *mqd;
785
64c7f8cf
BG
786 retval = 0;
787
788 if (allocate_vmid)
789 *allocate_vmid = 0;
790
791 mutex_lock(&dqm->lock);
792
b8cbab04 793 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 794 pr_warn("Can't create new usermode queue because %d queues were already created\n",
b8cbab04
OG
795 dqm->total_queue_count);
796 retval = -EPERM;
797 goto out;
798 }
799
bcea3081
BG
800 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
801 select_sdma_engine_id(q);
802
45c9a5e4 803 mqd = dqm->ops.get_mqd_manager(dqm,
bcea3081
BG
804 get_mqd_type_from_queue_type(q->properties.type));
805
4eacc26b 806 if (!mqd) {
ab7c1648
KR
807 retval = -ENOMEM;
808 goto out;
64c7f8cf
BG
809 }
810
bdcddf95 811 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
64c7f8cf
BG
812 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
813 &q->gart_mqd_addr, &q->properties);
4eacc26b 814 if (retval)
64c7f8cf
BG
815 goto out;
816
817 list_add(&q->list, &qpd->queues_list);
818 if (q->properties.is_active) {
819 dqm->queue_count++;
ac30c783 820 retval = execute_queues_cpsch(dqm);
64c7f8cf
BG
821 }
822
bcea3081 823 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
8eabaf54 824 dqm->sdma_queue_count++;
b8cbab04
OG
825 /*
826 * Unconditionally increment this counter, regardless of the queue's
827 * type or whether the queue is active.
828 */
829 dqm->total_queue_count++;
830
831 pr_debug("Total of %d queues are accountable so far\n",
832 dqm->total_queue_count);
833
64c7f8cf
BG
834out:
835 mutex_unlock(&dqm->lock);
836 return retval;
837}
838
788bf83d 839int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
d80d19bd 840 unsigned int fence_value,
8c72c3d7 841 unsigned int timeout_ms)
64c7f8cf 842{
8c72c3d7 843 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
64c7f8cf
BG
844
845 while (*fence_addr != fence_value) {
8c72c3d7 846 if (time_after(jiffies, end_jiffies)) {
79775b62 847 pr_err("qcm fence wait loop timeout expired\n");
64c7f8cf
BG
848 return -ETIME;
849 }
99331a51 850 schedule();
64c7f8cf
BG
851 }
852
853 return 0;
854}
855
7da2bcf8 856static int unmap_sdma_queues(struct device_queue_manager *dqm,
bcea3081
BG
857 unsigned int sdma_engine)
858{
859 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
7da2bcf8 860 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
bcea3081
BG
861 sdma_engine);
862}
863
ac30c783 864/* dqm->lock mutex has to be locked before calling this function */
7da2bcf8 865static int unmap_queues_cpsch(struct device_queue_manager *dqm,
ac30c783 866 bool static_queues_included)
64c7f8cf
BG
867{
868 int retval;
7da2bcf8 869 enum kfd_unmap_queues_filter filter;
a82918f1 870 struct kfd_process_device *pdd;
64c7f8cf 871
64c7f8cf
BG
872 retval = 0;
873
991ca8ee 874 if (!dqm->active_runlist)
ac30c783 875 return retval;
bcea3081 876
79775b62 877 pr_debug("Before destroying queues, sdma queue count is : %u\n",
bcea3081
BG
878 dqm->sdma_queue_count);
879
880 if (dqm->sdma_queue_count > 0) {
7da2bcf8
YZ
881 unmap_sdma_queues(dqm, 0);
882 unmap_sdma_queues(dqm, 1);
bcea3081
BG
883 }
884
7da2bcf8
YZ
885 filter = static_queues_included ?
886 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
887 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
992839ad 888
64c7f8cf 889 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
7da2bcf8 890 filter, 0, false, 0);
4eacc26b 891 if (retval)
ac30c783 892 return retval;
64c7f8cf
BG
893
894 *dqm->fence_addr = KFD_FENCE_INIT;
895 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
896 KFD_FENCE_COMPLETED);
897 /* should be timed out */
c3447e81 898 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
64c7f8cf 899 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
4eacc26b 900 if (retval) {
a82918f1
BG
901 pdd = kfd_get_process_device_data(dqm->dev,
902 kfd_get_process(current));
903 pdd->reset_wavefronts = true;
ac30c783 904 return retval;
c3447e81 905 }
64c7f8cf
BG
906 pm_release_ib(&dqm->packets);
907 dqm->active_runlist = false;
908
64c7f8cf
BG
909 return retval;
910}
911
ac30c783
YZ
912/* dqm->lock mutex has to be locked before calling this function */
913static int execute_queues_cpsch(struct device_queue_manager *dqm)
64c7f8cf
BG
914{
915 int retval;
916
ac30c783 917 retval = unmap_queues_cpsch(dqm, false);
4eacc26b 918 if (retval) {
79775b62 919 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
ac30c783 920 return retval;
64c7f8cf
BG
921 }
922
ac30c783
YZ
923 if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
924 return 0;
64c7f8cf 925
ac30c783
YZ
926 if (dqm->active_runlist)
927 return 0;
64c7f8cf
BG
928
929 retval = pm_send_runlist(&dqm->packets, &dqm->queues);
4eacc26b 930 if (retval) {
79775b62 931 pr_err("failed to execute runlist");
ac30c783 932 return retval;
64c7f8cf
BG
933 }
934 dqm->active_runlist = true;
935
64c7f8cf
BG
936 return retval;
937}
938
939static int destroy_queue_cpsch(struct device_queue_manager *dqm,
940 struct qcm_process_device *qpd,
941 struct queue *q)
942{
943 int retval;
944 struct mqd_manager *mqd;
992839ad 945 bool preempt_all_queues;
64c7f8cf 946
992839ad
YS
947 preempt_all_queues = false;
948
64c7f8cf
BG
949 retval = 0;
950
951 /* remove queue from list to prevent rescheduling after preemption */
952 mutex_lock(&dqm->lock);
992839ad
YS
953
954 if (qpd->is_debug) {
955 /*
956 * error, currently we do not allow to destroy a queue
957 * of a currently debugged process
958 */
959 retval = -EBUSY;
960 goto failed_try_destroy_debugged_queue;
961
962 }
963
45c9a5e4 964 mqd = dqm->ops.get_mqd_manager(dqm,
bcea3081 965 get_mqd_type_from_queue_type(q->properties.type));
64c7f8cf
BG
966 if (!mqd) {
967 retval = -ENOMEM;
968 goto failed;
969 }
970
bcea3081
BG
971 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
972 dqm->sdma_queue_count--;
973
64c7f8cf 974 list_del(&q->list);
b6819cec
JC
975 if (q->properties.is_active)
976 dqm->queue_count--;
64c7f8cf 977
ac30c783 978 execute_queues_cpsch(dqm);
64c7f8cf
BG
979
980 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
b8cbab04
OG
981
982 /*
983 * Unconditionally decrement this counter, regardless of the queue's
984 * type
985 */
986 dqm->total_queue_count--;
987 pr_debug("Total of %d queues are accountable so far\n",
988 dqm->total_queue_count);
64c7f8cf
BG
989
990 mutex_unlock(&dqm->lock);
991
992 return 0;
993
994failed:
992839ad
YS
995failed_try_destroy_debugged_queue:
996
64c7f8cf
BG
997 mutex_unlock(&dqm->lock);
998 return retval;
999}
1000
1001/*
1002 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1003 * stay in user mode.
1004 */
1005#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1006/* APE1 limit is inclusive and 64K aligned. */
1007#define APE1_LIMIT_ALIGNMENT 0xFFFF
1008
1009static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1010 struct qcm_process_device *qpd,
1011 enum cache_policy default_policy,
1012 enum cache_policy alternate_policy,
1013 void __user *alternate_aperture_base,
1014 uint64_t alternate_aperture_size)
1015{
a22fc854 1016 bool retval;
64c7f8cf 1017
64c7f8cf
BG
1018 mutex_lock(&dqm->lock);
1019
1020 if (alternate_aperture_size == 0) {
1021 /* base > limit disables APE1 */
1022 qpd->sh_mem_ape1_base = 1;
1023 qpd->sh_mem_ape1_limit = 0;
1024 } else {
1025 /*
1026 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1027 * SH_MEM_APE1_BASE[31:0], 0x0000 }
1028 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1029 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1030 * Verify that the base and size parameters can be
1031 * represented in this format and convert them.
1032 * Additionally restrict APE1 to user-mode addresses.
1033 */
1034
1035 uint64_t base = (uintptr_t)alternate_aperture_base;
1036 uint64_t limit = base + alternate_aperture_size - 1;
1037
ab7c1648
KR
1038 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1039 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1040 retval = false;
64c7f8cf 1041 goto out;
ab7c1648 1042 }
64c7f8cf
BG
1043
1044 qpd->sh_mem_ape1_base = base >> 16;
1045 qpd->sh_mem_ape1_limit = limit >> 16;
1046 }
1047
a22fc854
BG
1048 retval = dqm->ops_asic_specific.set_cache_memory_policy(
1049 dqm,
1050 qpd,
1051 default_policy,
1052 alternate_policy,
1053 alternate_aperture_base,
1054 alternate_aperture_size);
64c7f8cf
BG
1055
1056 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1057 program_sh_mem_settings(dqm, qpd);
1058
79775b62 1059 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
64c7f8cf
BG
1060 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1061 qpd->sh_mem_ape1_limit);
1062
64c7f8cf
BG
1063out:
1064 mutex_unlock(&dqm->lock);
ab7c1648 1065 return retval;
64c7f8cf
BG
1066}
1067
1068struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1069{
1070 struct device_queue_manager *dqm;
1071
79775b62 1072 pr_debug("Loading device queue manager\n");
a22fc854 1073
dbf56ab1 1074 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
64c7f8cf
BG
1075 if (!dqm)
1076 return NULL;
1077
1078 dqm->dev = dev;
1079 switch (sched_policy) {
1080 case KFD_SCHED_POLICY_HWS:
1081 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1082 /* initialize dqm for cp scheduling */
45c9a5e4
OG
1083 dqm->ops.create_queue = create_queue_cpsch;
1084 dqm->ops.initialize = initialize_cpsch;
1085 dqm->ops.start = start_cpsch;
1086 dqm->ops.stop = stop_cpsch;
1087 dqm->ops.destroy_queue = destroy_queue_cpsch;
1088 dqm->ops.update_queue = update_queue;
58dcd5bf
YZ
1089 dqm->ops.get_mqd_manager = get_mqd_manager;
1090 dqm->ops.register_process = register_process;
1091 dqm->ops.unregister_process = unregister_process;
1092 dqm->ops.uninitialize = uninitialize;
45c9a5e4
OG
1093 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1094 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1095 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
64c7f8cf
BG
1096 break;
1097 case KFD_SCHED_POLICY_NO_HWS:
1098 /* initialize dqm for no cp scheduling */
45c9a5e4
OG
1099 dqm->ops.start = start_nocpsch;
1100 dqm->ops.stop = stop_nocpsch;
1101 dqm->ops.create_queue = create_queue_nocpsch;
1102 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1103 dqm->ops.update_queue = update_queue;
58dcd5bf
YZ
1104 dqm->ops.get_mqd_manager = get_mqd_manager;
1105 dqm->ops.register_process = register_process;
1106 dqm->ops.unregister_process = unregister_process;
45c9a5e4 1107 dqm->ops.initialize = initialize_nocpsch;
58dcd5bf 1108 dqm->ops.uninitialize = uninitialize;
45c9a5e4 1109 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
64c7f8cf
BG
1110 break;
1111 default:
32fa8219
FK
1112 pr_err("Invalid scheduling policy %d\n", sched_policy);
1113 goto out_free;
64c7f8cf
BG
1114 }
1115
a22fc854
BG
1116 switch (dev->device_info->asic_family) {
1117 case CHIP_CARRIZO:
1118 device_queue_manager_init_vi(&dqm->ops_asic_specific);
300dec95
OG
1119 break;
1120
a22fc854
BG
1121 case CHIP_KAVERI:
1122 device_queue_manager_init_cik(&dqm->ops_asic_specific);
300dec95 1123 break;
e596b903
YZ
1124 default:
1125 WARN(1, "Unexpected ASIC family %u",
1126 dev->device_info->asic_family);
1127 goto out_free;
a22fc854
BG
1128 }
1129
32fa8219
FK
1130 if (!dqm->ops.initialize(dqm))
1131 return dqm;
64c7f8cf 1132
32fa8219
FK
1133out_free:
1134 kfree(dqm);
1135 return NULL;
64c7f8cf
BG
1136}
1137
1138void device_queue_manager_uninit(struct device_queue_manager *dqm)
1139{
45c9a5e4 1140 dqm->ops.uninitialize(dqm);
64c7f8cf
BG
1141 kfree(dqm);
1142}