Merge tag 'dma-mapping-5.13' of git://git.infradead.org/users/hch/dma-mapping
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
CommitLineData
64c7f8cf
BG
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
26103436
FK
24#include <linux/ratelimit.h>
25#include <linux/printk.h>
64c7f8cf
BG
26#include <linux/slab.h>
27#include <linux/list.h>
28#include <linux/types.h>
64c7f8cf 29#include <linux/bitops.h>
99331a51 30#include <linux/sched.h>
64c7f8cf
BG
31#include "kfd_priv.h"
32#include "kfd_device_queue_manager.h"
33#include "kfd_mqd_manager.h"
34#include "cik_regs.h"
35#include "kfd_kernel_queue.h"
5b87245f 36#include "amdgpu_amdkfd.h"
64c7f8cf
BG
37
38/* Size of the per-pipe EOP queue */
39#define CIK_HPD_EOP_BYTES_LOG2 11
40#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
41
64c7f8cf 42static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
c7b6bac9 43 u32 pasid, unsigned int vmid);
64c7f8cf 44
c4744e24
YZ
45static int execute_queues_cpsch(struct device_queue_manager *dqm,
46 enum kfd_unmap_queues_filter filter,
47 uint32_t filter_param);
7da2bcf8 48static int unmap_queues_cpsch(struct device_queue_manager *dqm,
4465f466
YZ
49 enum kfd_unmap_queues_filter filter,
50 uint32_t filter_param);
64c7f8cf 51
60a00956
FK
52static int map_queues_cpsch(struct device_queue_manager *dqm);
53
bcea3081 54static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1b4670f6 55 struct queue *q);
64c7f8cf 56
d39b7737
OZ
57static inline void deallocate_hqd(struct device_queue_manager *dqm,
58 struct queue *q);
59static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
60static int allocate_sdma_queue(struct device_queue_manager *dqm,
61 struct queue *q);
73ea648d
SL
62static void kfd_process_hw_exception(struct work_struct *work);
63
bcea3081
BG
64static inline
65enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
64c7f8cf 66{
1b4670f6 67 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
85d258f9
BG
68 return KFD_MQD_TYPE_SDMA;
69 return KFD_MQD_TYPE_CP;
64c7f8cf
BG
70}
71
d0b63bb3
AR
72static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
73{
74 int i;
0d801007
JC
75 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
76 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
d0b63bb3
AR
77
78 /* queue is available for KFD usage if bit is 1 */
79 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
80 if (test_bit(pipe_offset + i,
e6945304 81 dqm->dev->shared_resources.cp_queue_bitmap))
d0b63bb3
AR
82 return true;
83 return false;
84}
85
e6945304 86unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
64ea8f4a 87{
e6945304 88 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
d0b63bb3 89 KGD_MAX_QUEUES);
64ea8f4a
OG
90}
91
d0b63bb3 92unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
64c7f8cf 93{
d0b63bb3
AR
94 return dqm->dev->shared_resources.num_queue_per_pipe;
95}
96
97unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
98{
d0b63bb3 99 return dqm->dev->shared_resources.num_pipe_per_mec;
64c7f8cf
BG
100}
101
98bb9222
YZ
102static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
103{
104 return dqm->dev->device_info->num_sdma_engines;
105}
106
1b4670f6
OZ
107static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
108{
109 return dqm->dev->device_info->num_xgmi_sdma_engines;
110}
111
c7637c95
YZ
112static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
113{
114 return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
115}
116
98bb9222
YZ
117unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
118{
119 return dqm->dev->device_info->num_sdma_engines
d5094189 120 * dqm->dev->device_info->num_sdma_queues_per_engine;
98bb9222
YZ
121}
122
1b4670f6
OZ
123unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
124{
125 return dqm->dev->device_info->num_xgmi_sdma_engines
126 * dqm->dev->device_info->num_sdma_queues_per_engine;
127}
128
a22fc854 129void program_sh_mem_settings(struct device_queue_manager *dqm,
64c7f8cf
BG
130 struct qcm_process_device *qpd)
131{
cea405b1
XZ
132 return dqm->dev->kfd2kgd->program_sh_mem_settings(
133 dqm->dev->kgd, qpd->vmid,
64c7f8cf
BG
134 qpd->sh_mem_config,
135 qpd->sh_mem_ape1_base,
136 qpd->sh_mem_ape1_limit,
137 qpd->sh_mem_bases);
138}
139
204d8998 140static void increment_queue_count(struct device_queue_manager *dqm,
b42902f4
YZ
141 enum kfd_queue_type type)
142{
143 dqm->active_queue_count++;
144 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
145 dqm->active_cp_queue_count++;
146}
147
204d8998 148static void decrement_queue_count(struct device_queue_manager *dqm,
b42902f4
YZ
149 enum kfd_queue_type type)
150{
151 dqm->active_queue_count--;
152 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
153 dqm->active_cp_queue_count--;
154}
155
ef568db7
FK
156static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
157{
158 struct kfd_dev *dev = qpd->dqm->dev;
159
160 if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
161 /* On pre-SOC15 chips we need to use the queue ID to
162 * preserve the user mode ABI.
163 */
164 q->doorbell_id = q->properties.queue_id;
1b4670f6
OZ
165 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
166 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
234441dd
YZ
167 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
168 * doorbell assignments based on the engine and queue id.
169 * The doobell index distance between RLC (2*i) and (2*i+1)
170 * for a SDMA engine is 512.
ef568db7 171 */
234441dd
YZ
172 uint32_t *idx_offset =
173 dev->shared_resources.sdma_doorbell_idx;
174
175 q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
176 + (q->properties.sdma_queue_id & 1)
177 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
178 + (q->properties.sdma_queue_id >> 1);
ef568db7
FK
179 } else {
180 /* For CP queues on SOC15 reserve a free doorbell ID */
181 unsigned int found;
182
183 found = find_first_zero_bit(qpd->doorbell_bitmap,
184 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
185 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
186 pr_debug("No doorbells available");
187 return -EBUSY;
188 }
189 set_bit(found, qpd->doorbell_bitmap);
190 q->doorbell_id = found;
191 }
192
193 q->properties.doorbell_off =
59d7115d 194 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
ef568db7 195 q->doorbell_id);
ef568db7
FK
196 return 0;
197}
198
199static void deallocate_doorbell(struct qcm_process_device *qpd,
200 struct queue *q)
201{
202 unsigned int old;
203 struct kfd_dev *dev = qpd->dqm->dev;
204
205 if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
1b4670f6
OZ
206 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
207 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
ef568db7
FK
208 return;
209
210 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
211 WARN_ON(!old);
212}
213
64c7f8cf
BG
214static int allocate_vmid(struct device_queue_manager *dqm,
215 struct qcm_process_device *qpd,
216 struct queue *q)
217{
d9d4623c 218 int allocated_vmid = -1, i;
64c7f8cf 219
d9d4623c
YZ
220 for (i = dqm->dev->vm_info.first_vmid_kfd;
221 i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
222 if (!dqm->vmid_pasid[i]) {
223 allocated_vmid = i;
224 break;
225 }
226 }
227
228 if (allocated_vmid < 0) {
229 pr_err("no more vmid to allocate\n");
230 return -ENOSPC;
231 }
232
233 pr_debug("vmid allocated: %d\n", allocated_vmid);
234
235 dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
64c7f8cf 236
d9d4623c 237 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
64c7f8cf 238
64c7f8cf
BG
239 qpd->vmid = allocated_vmid;
240 q->properties.vmid = allocated_vmid;
241
64c7f8cf
BG
242 program_sh_mem_settings(dqm, qpd);
243
403575c4
FK
244 /* qpd->page_table_base is set earlier when register_process()
245 * is called, i.e. when the first queue is created.
246 */
247 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
248 qpd->vmid,
249 qpd->page_table_base);
250 /* invalidate the VM context after pasid and vmid mapping is set up */
251 kfd_flush_tlb(qpd_to_pdd(qpd));
252
c637b36a
YZ
253 if (dqm->dev->kfd2kgd->set_scratch_backing_va)
254 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
255 qpd->sh_hidden_private_base, qpd->vmid);
d39b7737 256
64c7f8cf
BG
257 return 0;
258}
259
552764b6
FK
260static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
261 struct qcm_process_device *qpd)
262{
f6e27ff1
FK
263 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
264 int ret;
552764b6
FK
265
266 if (!qpd->ib_kaddr)
267 return -ENOMEM;
268
f6e27ff1
FK
269 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
270 if (ret)
271 return ret;
552764b6 272
5b87245f 273 return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
f6e27ff1
FK
274 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
275 pmf->release_mem_size / sizeof(uint32_t));
552764b6
FK
276}
277
64c7f8cf
BG
278static void deallocate_vmid(struct device_queue_manager *dqm,
279 struct qcm_process_device *qpd,
280 struct queue *q)
281{
552764b6
FK
282 /* On GFX v7, CP doesn't flush TC at dequeue */
283 if (q->device->device_info->asic_family == CHIP_HAWAII)
284 if (flush_texture_cache_nocpsch(q->device, qpd))
285 pr_err("Failed to flush TC\n");
286
403575c4
FK
287 kfd_flush_tlb(qpd_to_pdd(qpd));
288
2030664b
BG
289 /* Release the vmid mapping */
290 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
d9d4623c 291 dqm->vmid_pasid[qpd->vmid] = 0;
2030664b 292
64c7f8cf
BG
293 qpd->vmid = 0;
294 q->properties.vmid = 0;
295}
296
297static int create_queue_nocpsch(struct device_queue_manager *dqm,
298 struct queue *q,
b46cb7d7 299 struct qcm_process_device *qpd)
64c7f8cf 300{
d39b7737 301 struct mqd_manager *mqd_mgr;
64c7f8cf
BG
302 int retval;
303
efeaed4d 304 dqm_lock(dqm);
64c7f8cf 305
b8cbab04 306 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 307 pr_warn("Can't create new usermode queue because %d queues were already created\n",
b8cbab04 308 dqm->total_queue_count);
ab7c1648
KR
309 retval = -EPERM;
310 goto out_unlock;
b8cbab04
OG
311 }
312
64c7f8cf
BG
313 if (list_empty(&qpd->queues_list)) {
314 retval = allocate_vmid(dqm, qpd, q);
ab7c1648
KR
315 if (retval)
316 goto out_unlock;
64c7f8cf 317 }
64c7f8cf 318 q->properties.vmid = qpd->vmid;
26103436 319 /*
bb2d2128
FK
320 * Eviction state logic: mark all queues as evicted, even ones
321 * not currently active. Restoring inactive queues later only
322 * updates the is_evicted flag but is a no-op otherwise.
26103436 323 */
bb2d2128 324 q->properties.is_evicted = !!qpd->evicted;
64c7f8cf 325
373d7080
FK
326 q->properties.tba_addr = qpd->tba_addr;
327 q->properties.tma_addr = qpd->tma_addr;
328
d091bc0a
OZ
329 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
330 q->properties.type)];
d39b7737
OZ
331 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
332 retval = allocate_hqd(dqm, q);
333 if (retval)
334 goto deallocate_vmid;
335 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
336 q->pipe, q->queue);
337 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
338 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
339 retval = allocate_sdma_queue(dqm, q);
340 if (retval)
341 goto deallocate_vmid;
342 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
343 }
344
345 retval = allocate_doorbell(qpd, q);
346 if (retval)
347 goto out_deallocate_hqd;
348
6a6ef5ee
OZ
349 /* Temporarily release dqm lock to avoid a circular lock dependency */
350 dqm_unlock(dqm);
d091bc0a 351 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
6a6ef5ee
OZ
352 dqm_lock(dqm);
353
d091bc0a
OZ
354 if (!q->mqd_mem_obj) {
355 retval = -ENOMEM;
356 goto out_deallocate_doorbell;
357 }
8636e53c
OZ
358 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
359 &q->gart_mqd_addr, &q->properties);
d39b7737 360 if (q->properties.is_active) {
2c99a547
PY
361 if (!dqm->sched_running) {
362 WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
363 goto add_queue_to_list;
364 }
d39b7737
OZ
365
366 if (WARN(q->process->mm != current->mm,
367 "should only run in user thread"))
368 retval = -EFAULT;
369 else
370 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
371 q->queue, &q->properties, current->mm);
372 if (retval)
d091bc0a 373 goto out_free_mqd;
64c7f8cf
BG
374 }
375
2c99a547 376add_queue_to_list:
64c7f8cf 377 list_add(&q->list, &qpd->queues_list);
bc920fd4 378 qpd->queue_count++;
b6819cec 379 if (q->properties.is_active)
b42902f4 380 increment_queue_count(dqm, q->properties.type);
64c7f8cf 381
b8cbab04
OG
382 /*
383 * Unconditionally increment this counter, regardless of the queue's
384 * type or whether the queue is active.
385 */
386 dqm->total_queue_count++;
387 pr_debug("Total of %d queues are accountable so far\n",
388 dqm->total_queue_count);
d091bc0a 389 goto out_unlock;
b8cbab04 390
d091bc0a
OZ
391out_free_mqd:
392 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
d39b7737
OZ
393out_deallocate_doorbell:
394 deallocate_doorbell(qpd, q);
395out_deallocate_hqd:
396 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
397 deallocate_hqd(dqm, q);
398 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
399 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
400 deallocate_sdma_queue(dqm, q);
401deallocate_vmid:
402 if (list_empty(&qpd->queues_list))
403 deallocate_vmid(dqm, qpd, q);
ab7c1648 404out_unlock:
efeaed4d 405 dqm_unlock(dqm);
ab7c1648 406 return retval;
64c7f8cf
BG
407}
408
409static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
410{
411 bool set;
f0ec5b99 412 int pipe, bit, i;
64c7f8cf
BG
413
414 set = false;
415
8eabaf54
KR
416 for (pipe = dqm->next_pipe_to_allocate, i = 0;
417 i < get_pipes_per_mec(dqm);
d0b63bb3
AR
418 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
419
420 if (!is_pipe_enabled(dqm, 0, pipe))
421 continue;
422
64c7f8cf 423 if (dqm->allocated_queues[pipe] != 0) {
4252bf68
HK
424 bit = ffs(dqm->allocated_queues[pipe]) - 1;
425 dqm->allocated_queues[pipe] &= ~(1 << bit);
64c7f8cf
BG
426 q->pipe = pipe;
427 q->queue = bit;
428 set = true;
429 break;
430 }
431 }
432
991ca8ee 433 if (!set)
64c7f8cf
BG
434 return -EBUSY;
435
79775b62 436 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
64c7f8cf 437 /* horizontal hqd allocation */
d0b63bb3 438 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
64c7f8cf
BG
439
440 return 0;
441}
442
443static inline void deallocate_hqd(struct device_queue_manager *dqm,
444 struct queue *q)
445{
4252bf68 446 dqm->allocated_queues[q->pipe] |= (1 << q->queue);
64c7f8cf
BG
447}
448
9fd3f1bf
FK
449/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
450 * to avoid asynchronized access
451 */
452static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
64c7f8cf
BG
453 struct qcm_process_device *qpd,
454 struct queue *q)
455{
456 int retval;
8d5f3552 457 struct mqd_manager *mqd_mgr;
64c7f8cf 458
fdfa090b
OZ
459 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
460 q->properties.type)];
64c7f8cf 461
c7637c95 462 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
c2e1b3a4 463 deallocate_hqd(dqm, q);
c7637c95 464 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1b4670f6 465 deallocate_sdma_queue(dqm, q);
c7637c95 466 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1b4670f6 467 deallocate_sdma_queue(dqm, q);
c7637c95 468 else {
79775b62 469 pr_debug("q->properties.type %d is invalid\n",
7113cd65 470 q->properties.type);
9fd3f1bf 471 return -EINVAL;
64c7f8cf 472 }
9fd3f1bf 473 dqm->total_queue_count--;
64c7f8cf 474
ef568db7
FK
475 deallocate_doorbell(qpd, q);
476
2c99a547
PY
477 if (!dqm->sched_running) {
478 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
479 return 0;
480 }
481
8d5f3552 482 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
c2e1b3a4 483 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
b90e3fbe 484 KFD_UNMAP_LATENCY_MS,
64c7f8cf 485 q->pipe, q->queue);
9fd3f1bf
FK
486 if (retval == -ETIME)
487 qpd->reset_wavefronts = true;
64c7f8cf 488
32cb59f3 489
8636e53c 490 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
64c7f8cf
BG
491
492 list_del(&q->list);
9fd3f1bf
FK
493 if (list_empty(&qpd->queues_list)) {
494 if (qpd->reset_wavefronts) {
495 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
496 dqm->dev);
497 /* dbgdev_wave_reset_wavefronts has to be called before
498 * deallocate_vmid(), i.e. when vmid is still in use.
499 */
500 dbgdev_wave_reset_wavefronts(dqm->dev,
501 qpd->pqm->process);
502 qpd->reset_wavefronts = false;
503 }
504
64c7f8cf 505 deallocate_vmid(dqm, qpd, q);
9fd3f1bf 506 }
bc920fd4 507 qpd->queue_count--;
b8020b03 508 if (q->properties.is_active) {
b42902f4 509 decrement_queue_count(dqm, q->properties.type);
b8020b03
JG
510 if (q->properties.is_gws) {
511 dqm->gws_queue_count--;
512 qpd->mapped_gws_queue = false;
513 }
514 }
b8cbab04 515
9fd3f1bf
FK
516 return retval;
517}
b8cbab04 518
9fd3f1bf
FK
519static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
520 struct qcm_process_device *qpd,
521 struct queue *q)
522{
523 int retval;
d69fd951
MJ
524 uint64_t sdma_val = 0;
525 struct kfd_process_device *pdd = qpd_to_pdd(qpd);
526
527 /* Get the SDMA queue stats */
528 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
529 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
818b0324 530 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
d69fd951
MJ
531 &sdma_val);
532 if (retval)
533 pr_err("Failed to read SDMA queue counter for queue: %d\n",
534 q->properties.queue_id);
535 }
9fd3f1bf 536
efeaed4d 537 dqm_lock(dqm);
9fd3f1bf 538 retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
d69fd951
MJ
539 if (!retval)
540 pdd->sdma_past_activity_counter += sdma_val;
efeaed4d 541 dqm_unlock(dqm);
9fd3f1bf 542
64c7f8cf
BG
543 return retval;
544}
545
546static int update_queue(struct device_queue_manager *dqm, struct queue *q)
547{
8636e53c 548 int retval = 0;
8d5f3552 549 struct mqd_manager *mqd_mgr;
26103436 550 struct kfd_process_device *pdd;
b6ffbab8 551 bool prev_active = false;
64c7f8cf 552
efeaed4d 553 dqm_lock(dqm);
26103436
FK
554 pdd = kfd_get_process_device_data(q->device, q->process);
555 if (!pdd) {
556 retval = -ENODEV;
557 goto out_unlock;
558 }
fdfa090b
OZ
559 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
560 q->properties.type)];
64c7f8cf 561
60a00956
FK
562 /* Save previous activity state for counters */
563 prev_active = q->properties.is_active;
564
565 /* Make sure the queue is unmapped before updating the MQD */
d146c5a7 566 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
60a00956
FK
567 retval = unmap_queues_cpsch(dqm,
568 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
894a8293 569 if (retval) {
60a00956
FK
570 pr_err("unmap queue failed\n");
571 goto out_unlock;
572 }
894a8293 573 } else if (prev_active &&
60a00956 574 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
1b4670f6
OZ
575 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
576 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
2c99a547
PY
577
578 if (!dqm->sched_running) {
579 WARN_ONCE(1, "Update non-HWS queue while stopped\n");
580 goto out_unlock;
581 }
582
8d5f3552 583 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
60a00956
FK
584 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
585 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
586 if (retval) {
587 pr_err("destroy mqd failed\n");
588 goto out_unlock;
589 }
590 }
591
8636e53c 592 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
60a00956 593
096d1a3e
FK
594 /*
595 * check active state vs. the previous state and modify
596 * counter accordingly. map_queues_cpsch uses the
81b820b3 597 * dqm->active_queue_count to determine whether a new runlist must be
096d1a3e
FK
598 * uploaded.
599 */
600 if (q->properties.is_active && !prev_active)
b42902f4 601 increment_queue_count(dqm, q->properties.type);
096d1a3e 602 else if (!q->properties.is_active && prev_active)
b42902f4 603 decrement_queue_count(dqm, q->properties.type);
096d1a3e 604
b8020b03
JG
605 if (q->gws && !q->properties.is_gws) {
606 if (q->properties.is_active) {
607 dqm->gws_queue_count++;
608 pdd->qpd.mapped_gws_queue = true;
609 }
610 q->properties.is_gws = true;
611 } else if (!q->gws && q->properties.is_gws) {
612 if (q->properties.is_active) {
613 dqm->gws_queue_count--;
614 pdd->qpd.mapped_gws_queue = false;
615 }
616 q->properties.is_gws = false;
617 }
618
d146c5a7 619 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
60a00956 620 retval = map_queues_cpsch(dqm);
894a8293 621 else if (q->properties.is_active &&
60a00956 622 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
1b4670f6
OZ
623 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
624 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1b19aa5a
FK
625 if (WARN(q->process->mm != current->mm,
626 "should only run in user thread"))
627 retval = -EFAULT;
628 else
629 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
630 q->pipe, q->queue,
631 &q->properties, current->mm);
632 }
b6ffbab8 633
ab7c1648 634out_unlock:
efeaed4d 635 dqm_unlock(dqm);
64c7f8cf
BG
636 return retval;
637}
638
26103436
FK
639static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
640 struct qcm_process_device *qpd)
641{
642 struct queue *q;
8d5f3552 643 struct mqd_manager *mqd_mgr;
26103436 644 struct kfd_process_device *pdd;
bb2d2128 645 int retval, ret = 0;
26103436 646
efeaed4d 647 dqm_lock(dqm);
26103436
FK
648 if (qpd->evicted++ > 0) /* already evicted, do nothing */
649 goto out;
650
651 pdd = qpd_to_pdd(qpd);
783a25f4 652 pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
26103436
FK
653 pdd->process->pasid);
654
4327bed2 655 pdd->last_evict_timestamp = get_jiffies_64();
bb2d2128
FK
656 /* Mark all queues as evicted. Deactivate all active queues on
657 * the qpd.
658 */
26103436 659 list_for_each_entry(q, &qpd->queues_list, list) {
bb2d2128 660 q->properties.is_evicted = true;
26103436
FK
661 if (!q->properties.is_active)
662 continue;
bb2d2128 663
fdfa090b
OZ
664 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
665 q->properties.type)];
26103436 666 q->properties.is_active = false;
b42902f4 667 decrement_queue_count(dqm, q->properties.type);
b8020b03
JG
668 if (q->properties.is_gws) {
669 dqm->gws_queue_count--;
670 qpd->mapped_gws_queue = false;
671 }
2c99a547
PY
672
673 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
674 continue;
675
8d5f3552 676 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
26103436
FK
677 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
678 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
bb2d2128
FK
679 if (retval && !ret)
680 /* Return the first error, but keep going to
681 * maintain a consistent eviction state
682 */
683 ret = retval;
26103436
FK
684 }
685
686out:
efeaed4d 687 dqm_unlock(dqm);
bb2d2128 688 return ret;
26103436
FK
689}
690
691static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
692 struct qcm_process_device *qpd)
693{
694 struct queue *q;
695 struct kfd_process_device *pdd;
696 int retval = 0;
697
efeaed4d 698 dqm_lock(dqm);
26103436
FK
699 if (qpd->evicted++ > 0) /* already evicted, do nothing */
700 goto out;
701
702 pdd = qpd_to_pdd(qpd);
783a25f4 703 pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
26103436
FK
704 pdd->process->pasid);
705
bb2d2128
FK
706 /* Mark all queues as evicted. Deactivate all active queues on
707 * the qpd.
708 */
26103436 709 list_for_each_entry(q, &qpd->queues_list, list) {
bb2d2128 710 q->properties.is_evicted = true;
26103436
FK
711 if (!q->properties.is_active)
712 continue;
bb2d2128 713
26103436 714 q->properties.is_active = false;
b42902f4 715 decrement_queue_count(dqm, q->properties.type);
26103436 716 }
4327bed2 717 pdd->last_evict_timestamp = get_jiffies_64();
26103436
FK
718 retval = execute_queues_cpsch(dqm,
719 qpd->is_debug ?
720 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
721 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
722
723out:
efeaed4d 724 dqm_unlock(dqm);
26103436
FK
725 return retval;
726}
727
728static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
729 struct qcm_process_device *qpd)
730{
1b19aa5a 731 struct mm_struct *mm = NULL;
26103436 732 struct queue *q;
8d5f3552 733 struct mqd_manager *mqd_mgr;
26103436 734 struct kfd_process_device *pdd;
e715c6d0 735 uint64_t pd_base;
4327bed2 736 uint64_t eviction_duration;
bb2d2128 737 int retval, ret = 0;
26103436
FK
738
739 pdd = qpd_to_pdd(qpd);
740 /* Retrieve PD base */
5b87245f 741 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
26103436 742
efeaed4d 743 dqm_lock(dqm);
26103436
FK
744 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
745 goto out;
746 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
747 qpd->evicted--;
748 goto out;
749 }
750
783a25f4 751 pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
26103436
FK
752 pdd->process->pasid);
753
754 /* Update PD Base in QPD */
755 qpd->page_table_base = pd_base;
e715c6d0 756 pr_debug("Updated PD address to 0x%llx\n", pd_base);
26103436
FK
757
758 if (!list_empty(&qpd->queues_list)) {
759 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
760 dqm->dev->kgd,
761 qpd->vmid,
762 qpd->page_table_base);
763 kfd_flush_tlb(pdd);
764 }
765
1b19aa5a
FK
766 /* Take a safe reference to the mm_struct, which may otherwise
767 * disappear even while the kfd_process is still referenced.
768 */
769 mm = get_task_mm(pdd->process->lead_thread);
770 if (!mm) {
bb2d2128 771 ret = -EFAULT;
1b19aa5a
FK
772 goto out;
773 }
774
bb2d2128
FK
775 /* Remove the eviction flags. Activate queues that are not
776 * inactive for other reasons.
777 */
26103436 778 list_for_each_entry(q, &qpd->queues_list, list) {
bb2d2128
FK
779 q->properties.is_evicted = false;
780 if (!QUEUE_IS_ACTIVE(q->properties))
26103436 781 continue;
bb2d2128 782
fdfa090b
OZ
783 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
784 q->properties.type)];
26103436 785 q->properties.is_active = true;
b42902f4 786 increment_queue_count(dqm, q->properties.type);
b8020b03
JG
787 if (q->properties.is_gws) {
788 dqm->gws_queue_count++;
789 qpd->mapped_gws_queue = true;
790 }
2c99a547
PY
791
792 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
793 continue;
794
8d5f3552 795 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1b19aa5a 796 q->queue, &q->properties, mm);
bb2d2128
FK
797 if (retval && !ret)
798 /* Return the first error, but keep going to
799 * maintain a consistent eviction state
800 */
801 ret = retval;
26103436
FK
802 }
803 qpd->evicted = 0;
4327bed2
PC
804 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
805 atomic64_add(eviction_duration, &pdd->evict_duration_counter);
26103436 806out:
1b19aa5a
FK
807 if (mm)
808 mmput(mm);
efeaed4d 809 dqm_unlock(dqm);
bb2d2128 810 return ret;
26103436
FK
811}
812
813static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
814 struct qcm_process_device *qpd)
815{
816 struct queue *q;
817 struct kfd_process_device *pdd;
e715c6d0 818 uint64_t pd_base;
4327bed2 819 uint64_t eviction_duration;
26103436
FK
820 int retval = 0;
821
822 pdd = qpd_to_pdd(qpd);
823 /* Retrieve PD base */
5b87245f 824 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
26103436 825
efeaed4d 826 dqm_lock(dqm);
26103436
FK
827 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
828 goto out;
829 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
830 qpd->evicted--;
831 goto out;
832 }
833
783a25f4 834 pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
26103436
FK
835 pdd->process->pasid);
836
837 /* Update PD Base in QPD */
838 qpd->page_table_base = pd_base;
e715c6d0 839 pr_debug("Updated PD address to 0x%llx\n", pd_base);
26103436
FK
840
841 /* activate all active queues on the qpd */
842 list_for_each_entry(q, &qpd->queues_list, list) {
26103436 843 q->properties.is_evicted = false;
bb2d2128
FK
844 if (!QUEUE_IS_ACTIVE(q->properties))
845 continue;
846
26103436 847 q->properties.is_active = true;
b42902f4 848 increment_queue_count(dqm, q->properties.type);
26103436
FK
849 }
850 retval = execute_queues_cpsch(dqm,
851 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
bb2d2128 852 qpd->evicted = 0;
4327bed2
PC
853 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
854 atomic64_add(eviction_duration, &pdd->evict_duration_counter);
26103436 855out:
efeaed4d 856 dqm_unlock(dqm);
26103436
FK
857 return retval;
858}
859
58dcd5bf 860static int register_process(struct device_queue_manager *dqm,
64c7f8cf
BG
861 struct qcm_process_device *qpd)
862{
863 struct device_process_node *n;
403575c4 864 struct kfd_process_device *pdd;
e715c6d0 865 uint64_t pd_base;
a22fc854 866 int retval;
64c7f8cf 867
dbf56ab1 868 n = kzalloc(sizeof(*n), GFP_KERNEL);
64c7f8cf
BG
869 if (!n)
870 return -ENOMEM;
871
872 n->qpd = qpd;
873
403575c4
FK
874 pdd = qpd_to_pdd(qpd);
875 /* Retrieve PD base */
5b87245f 876 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
403575c4 877
efeaed4d 878 dqm_lock(dqm);
64c7f8cf
BG
879 list_add(&n->list, &dqm->queues);
880
403575c4
FK
881 /* Update PD Base in QPD */
882 qpd->page_table_base = pd_base;
e715c6d0 883 pr_debug("Updated PD address to 0x%llx\n", pd_base);
403575c4 884
bfd5e378 885 retval = dqm->asic_ops.update_qpd(dqm, qpd);
a22fc854 886
f756e631 887 dqm->processes_count++;
64c7f8cf 888
efeaed4d 889 dqm_unlock(dqm);
64c7f8cf 890
32cce8bc
FK
891 /* Outside the DQM lock because under the DQM lock we can't do
892 * reclaim or take other locks that others hold while reclaiming.
893 */
894 kfd_inc_compute_active(dqm->dev);
895
a22fc854 896 return retval;
64c7f8cf
BG
897}
898
58dcd5bf 899static int unregister_process(struct device_queue_manager *dqm,
64c7f8cf
BG
900 struct qcm_process_device *qpd)
901{
902 int retval;
903 struct device_process_node *cur, *next;
904
1e5ec956
OG
905 pr_debug("qpd->queues_list is %s\n",
906 list_empty(&qpd->queues_list) ? "empty" : "not empty");
64c7f8cf
BG
907
908 retval = 0;
efeaed4d 909 dqm_lock(dqm);
64c7f8cf
BG
910
911 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
912 if (qpd == cur->qpd) {
913 list_del(&cur->list);
f5d896bb 914 kfree(cur);
f756e631 915 dqm->processes_count--;
64c7f8cf
BG
916 goto out;
917 }
918 }
919 /* qpd not found in dqm list */
920 retval = 1;
921out:
efeaed4d 922 dqm_unlock(dqm);
32cce8bc
FK
923
924 /* Outside the DQM lock because under the DQM lock we can't do
925 * reclaim or take other locks that others hold while reclaiming.
926 */
927 if (!retval)
928 kfd_dec_compute_active(dqm->dev);
929
64c7f8cf
BG
930 return retval;
931}
932
933static int
c7b6bac9 934set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
64c7f8cf
BG
935 unsigned int vmid)
936{
cea405b1 937 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
deb99d7c 938 dqm->dev->kgd, pasid, vmid);
64c7f8cf
BG
939}
940
2249d558
AL
941static void init_interrupts(struct device_queue_manager *dqm)
942{
943 unsigned int i;
944
d0b63bb3
AR
945 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
946 if (is_pipe_enabled(dqm, 0, i))
947 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
2249d558
AL
948}
949
64c7f8cf
BG
950static int initialize_nocpsch(struct device_queue_manager *dqm)
951{
86194cf8 952 int pipe, queue;
64c7f8cf 953
79775b62 954 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
64c7f8cf 955
ab7c1648
KR
956 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
957 sizeof(unsigned int), GFP_KERNEL);
958 if (!dqm->allocated_queues)
959 return -ENOMEM;
960
efeaed4d 961 mutex_init(&dqm->lock_hidden);
64c7f8cf 962 INIT_LIST_HEAD(&dqm->queues);
81b820b3 963 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
b42902f4 964 dqm->active_cp_queue_count = 0;
b8020b03 965 dqm->gws_queue_count = 0;
64c7f8cf 966
86194cf8
FK
967 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
968 int pipe_offset = pipe * get_queues_per_pipe(dqm);
969
970 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
971 if (test_bit(pipe_offset + queue,
e6945304 972 dqm->dev->shared_resources.cp_queue_bitmap))
86194cf8
FK
973 dqm->allocated_queues[pipe] |= 1 << queue;
974 }
64c7f8cf 975
d9d4623c
YZ
976 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
977
35cdc81b
OZ
978 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
979 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
64c7f8cf 980
64c7f8cf
BG
981 return 0;
982}
983
58dcd5bf 984static void uninitialize(struct device_queue_manager *dqm)
64c7f8cf 985{
6f9d54fd
OG
986 int i;
987
81b820b3 988 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
64c7f8cf
BG
989
990 kfree(dqm->allocated_queues);
6f9d54fd 991 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
8d5f3552 992 kfree(dqm->mqd_mgrs[i]);
efeaed4d 993 mutex_destroy(&dqm->lock_hidden);
64c7f8cf
BG
994}
995
996static int start_nocpsch(struct device_queue_manager *dqm)
997{
52055039 998 pr_info("SW scheduler is used");
2249d558 999 init_interrupts(dqm);
424b5442
YZ
1000
1001 if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
1002 return pm_init(&dqm->packets, dqm);
2c99a547
PY
1003 dqm->sched_running = true;
1004
424b5442 1005 return 0;
64c7f8cf
BG
1006}
1007
1008static int stop_nocpsch(struct device_queue_manager *dqm)
1009{
424b5442 1010 if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
c2a77fde 1011 pm_uninit(&dqm->packets, false);
2c99a547
PY
1012 dqm->sched_running = false;
1013
64c7f8cf
BG
1014 return 0;
1015}
1016
09c34e8d
FK
1017static void pre_reset(struct device_queue_manager *dqm)
1018{
1019 dqm_lock(dqm);
1020 dqm->is_resetting = true;
1021 dqm_unlock(dqm);
1022}
1023
bcea3081 1024static int allocate_sdma_queue(struct device_queue_manager *dqm,
e78579aa 1025 struct queue *q)
bcea3081
BG
1026{
1027 int bit;
1028
1b4670f6 1029 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
c7637c95
YZ
1030 if (dqm->sdma_bitmap == 0) {
1031 pr_err("No more SDMA queue to allocate\n");
1b4670f6 1032 return -ENOMEM;
c7637c95
YZ
1033 }
1034
1b4670f6
OZ
1035 bit = __ffs64(dqm->sdma_bitmap);
1036 dqm->sdma_bitmap &= ~(1ULL << bit);
1037 q->sdma_id = bit;
1038 q->properties.sdma_engine_id = q->sdma_id %
1039 get_num_sdma_engines(dqm);
1040 q->properties.sdma_queue_id = q->sdma_id /
1041 get_num_sdma_engines(dqm);
1042 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
c7637c95
YZ
1043 if (dqm->xgmi_sdma_bitmap == 0) {
1044 pr_err("No more XGMI SDMA queue to allocate\n");
1b4670f6 1045 return -ENOMEM;
c7637c95 1046 }
1b4670f6
OZ
1047 bit = __ffs64(dqm->xgmi_sdma_bitmap);
1048 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1049 q->sdma_id = bit;
1050 /* sdma_engine_id is sdma id including
1051 * both PCIe-optimized SDMAs and XGMI-
1052 * optimized SDMAs. The calculation below
1053 * assumes the first N engines are always
1054 * PCIe-optimized ones
1055 */
1056 q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
1057 q->sdma_id % get_num_xgmi_sdma_engines(dqm);
1058 q->properties.sdma_queue_id = q->sdma_id /
1059 get_num_xgmi_sdma_engines(dqm);
1060 }
e78579aa 1061
e78579aa
YZ
1062 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1063 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
bcea3081
BG
1064
1065 return 0;
1066}
1067
1068static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1b4670f6 1069 struct queue *q)
bcea3081 1070{
1b4670f6
OZ
1071 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1072 if (q->sdma_id >= get_num_sdma_queues(dqm))
1073 return;
1074 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1075 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1076 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1077 return;
1078 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1079 }
bcea3081
BG
1080}
1081
64c7f8cf
BG
1082/*
1083 * Device Queue Manager implementation for cp scheduler
1084 */
1085
1086static int set_sched_resources(struct device_queue_manager *dqm)
1087{
d0b63bb3 1088 int i, mec;
64c7f8cf 1089 struct scheduling_resources res;
64c7f8cf 1090
44008d7a 1091 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
d0b63bb3
AR
1092
1093 res.queue_mask = 0;
1094 for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1095 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1096 / dqm->dev->shared_resources.num_pipe_per_mec;
1097
e6945304 1098 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
d0b63bb3
AR
1099 continue;
1100
1101 /* only acquire queues from the first MEC */
1102 if (mec > 0)
1103 continue;
1104
1105 /* This situation may be hit in the future if a new HW
1106 * generation exposes more than 64 queues. If so, the
8eabaf54
KR
1107 * definition of res.queue_mask needs updating
1108 */
1d11ee89 1109 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
d0b63bb3
AR
1110 pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1111 break;
1112 }
1113
d09f85d5
YZ
1114 res.queue_mask |= 1ull
1115 << amdgpu_queue_mask_bit_to_set_resource_bit(
1116 (struct amdgpu_device *)dqm->dev->kgd, i);
d0b63bb3 1117 }
d9848e14
OZ
1118 res.gws_mask = ~0ull;
1119 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
64c7f8cf 1120
79775b62
KR
1121 pr_debug("Scheduling resources:\n"
1122 "vmid mask: 0x%8X\n"
1123 "queue mask: 0x%8llX\n",
64c7f8cf
BG
1124 res.vmid_mask, res.queue_mask);
1125
1126 return pm_send_set_resources(&dqm->packets, &res);
1127}
1128
1129static int initialize_cpsch(struct device_queue_manager *dqm)
1130{
50e2fc36
AJ
1131 uint64_t num_sdma_queues;
1132 uint64_t num_xgmi_sdma_queues;
1133
79775b62 1134 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
64c7f8cf 1135
efeaed4d 1136 mutex_init(&dqm->lock_hidden);
64c7f8cf 1137 INIT_LIST_HEAD(&dqm->queues);
81b820b3 1138 dqm->active_queue_count = dqm->processes_count = 0;
b42902f4 1139 dqm->active_cp_queue_count = 0;
b8020b03 1140 dqm->gws_queue_count = 0;
64c7f8cf 1141 dqm->active_runlist = false;
50e2fc36
AJ
1142
1143 num_sdma_queues = get_num_sdma_queues(dqm);
1144 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
1145 dqm->sdma_bitmap = ULLONG_MAX;
1146 else
1147 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
1148
1149 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
1150 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
1151 dqm->xgmi_sdma_bitmap = ULLONG_MAX;
1152 else
1153 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
64c7f8cf 1154
73ea648d
SL
1155 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1156
bfd5e378 1157 return 0;
64c7f8cf
BG
1158}
1159
1160static int start_cpsch(struct device_queue_manager *dqm)
1161{
64c7f8cf
BG
1162 int retval;
1163
64c7f8cf
BG
1164 retval = 0;
1165
1166 retval = pm_init(&dqm->packets, dqm);
4eacc26b 1167 if (retval)
64c7f8cf
BG
1168 goto fail_packet_manager_init;
1169
1170 retval = set_sched_resources(dqm);
4eacc26b 1171 if (retval)
64c7f8cf
BG
1172 goto fail_set_sched_resources;
1173
79775b62 1174 pr_debug("Allocating fence memory\n");
64c7f8cf
BG
1175
1176 /* allocate fence memory on the gart */
a86aa3ca
OG
1177 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1178 &dqm->fence_mem);
64c7f8cf 1179
4eacc26b 1180 if (retval)
64c7f8cf
BG
1181 goto fail_allocate_vidmem;
1182
b010affe 1183 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
64c7f8cf 1184 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
2249d558
AL
1185
1186 init_interrupts(dqm);
1187
efeaed4d 1188 dqm_lock(dqm);
73ea648d
SL
1189 /* clear hang status when driver try to start the hw scheduler */
1190 dqm->is_hws_hang = false;
09c34e8d 1191 dqm->is_resetting = false;
2c99a547 1192 dqm->sched_running = true;
c4744e24 1193 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
efeaed4d 1194 dqm_unlock(dqm);
64c7f8cf
BG
1195
1196 return 0;
1197fail_allocate_vidmem:
1198fail_set_sched_resources:
c2a77fde 1199 pm_uninit(&dqm->packets, false);
64c7f8cf
BG
1200fail_packet_manager_init:
1201 return retval;
1202}
1203
1204static int stop_cpsch(struct device_queue_manager *dqm)
1205{
c2a77fde
FK
1206 bool hanging;
1207
efeaed4d 1208 dqm_lock(dqm);
c2a77fde
FK
1209 if (!dqm->is_hws_hang)
1210 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1211 hanging = dqm->is_hws_hang || dqm->is_resetting;
2c99a547 1212 dqm->sched_running = false;
efeaed4d 1213 dqm_unlock(dqm);
64c7f8cf 1214
087d7641
DL
1215 pm_release_ib(&dqm->packets);
1216
a86aa3ca 1217 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
c2a77fde 1218 pm_uninit(&dqm->packets, hanging);
64c7f8cf
BG
1219
1220 return 0;
1221}
1222
1223static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1224 struct kernel_queue *kq,
1225 struct qcm_process_device *qpd)
1226{
efeaed4d 1227 dqm_lock(dqm);
b8cbab04 1228 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 1229 pr_warn("Can't create new kernel queue because %d queues were already created\n",
b8cbab04 1230 dqm->total_queue_count);
efeaed4d 1231 dqm_unlock(dqm);
b8cbab04
OG
1232 return -EPERM;
1233 }
1234
1235 /*
1236 * Unconditionally increment this counter, regardless of the queue's
1237 * type or whether the queue is active.
1238 */
1239 dqm->total_queue_count++;
1240 pr_debug("Total of %d queues are accountable so far\n",
1241 dqm->total_queue_count);
1242
64c7f8cf 1243 list_add(&kq->list, &qpd->priv_queue_list);
b42902f4 1244 increment_queue_count(dqm, kq->queue->properties.type);
64c7f8cf 1245 qpd->is_debug = true;
c4744e24 1246 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
efeaed4d 1247 dqm_unlock(dqm);
64c7f8cf
BG
1248
1249 return 0;
1250}
1251
1252static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1253 struct kernel_queue *kq,
1254 struct qcm_process_device *qpd)
1255{
efeaed4d 1256 dqm_lock(dqm);
64c7f8cf 1257 list_del(&kq->list);
b42902f4 1258 decrement_queue_count(dqm, kq->queue->properties.type);
64c7f8cf 1259 qpd->is_debug = false;
c4744e24 1260 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
b8cbab04
OG
1261 /*
1262 * Unconditionally decrement this counter, regardless of the queue's
1263 * type.
1264 */
8b58f261 1265 dqm->total_queue_count--;
b8cbab04
OG
1266 pr_debug("Total of %d queues are accountable so far\n",
1267 dqm->total_queue_count);
efeaed4d 1268 dqm_unlock(dqm);
64c7f8cf
BG
1269}
1270
1271static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
b46cb7d7 1272 struct qcm_process_device *qpd)
64c7f8cf
BG
1273{
1274 int retval;
8d5f3552 1275 struct mqd_manager *mqd_mgr;
64c7f8cf 1276
b8cbab04 1277 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 1278 pr_warn("Can't create new usermode queue because %d queues were already created\n",
b8cbab04 1279 dqm->total_queue_count);
70d488fb
OZ
1280 retval = -EPERM;
1281 goto out;
b8cbab04
OG
1282 }
1283
1b4670f6
OZ
1284 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1285 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
38bb4226 1286 dqm_lock(dqm);
e78579aa 1287 retval = allocate_sdma_queue(dqm, q);
38bb4226 1288 dqm_unlock(dqm);
894a8293 1289 if (retval)
70d488fb 1290 goto out;
e139cd2a 1291 }
ef568db7
FK
1292
1293 retval = allocate_doorbell(qpd, q);
1294 if (retval)
1295 goto out_deallocate_sdma_queue;
1296
70d488fb
OZ
1297 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1298 q->properties.type)];
70df8273 1299
eec0b4cf
OZ
1300 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1301 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1302 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
373d7080
FK
1303 q->properties.tba_addr = qpd->tba_addr;
1304 q->properties.tma_addr = qpd->tma_addr;
70d488fb
OZ
1305 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1306 if (!q->mqd_mem_obj) {
1307 retval = -ENOMEM;
1308 goto out_deallocate_doorbell;
1309 }
70df8273
EH
1310
1311 dqm_lock(dqm);
1312 /*
1313 * Eviction state logic: mark all queues as evicted, even ones
1314 * not currently active. Restoring inactive queues later only
1315 * updates the is_evicted flag but is a no-op otherwise.
1316 */
1317 q->properties.is_evicted = !!qpd->evicted;
8636e53c
OZ
1318 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1319 &q->gart_mqd_addr, &q->properties);
89cd9d23 1320
64c7f8cf 1321 list_add(&q->list, &qpd->queues_list);
bc920fd4 1322 qpd->queue_count++;
f38abc15 1323
64c7f8cf 1324 if (q->properties.is_active) {
b42902f4
YZ
1325 increment_queue_count(dqm, q->properties.type);
1326
66a5710b 1327 execute_queues_cpsch(dqm,
c4744e24 1328 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
64c7f8cf
BG
1329 }
1330
b8cbab04
OG
1331 /*
1332 * Unconditionally increment this counter, regardless of the queue's
1333 * type or whether the queue is active.
1334 */
1335 dqm->total_queue_count++;
1336
1337 pr_debug("Total of %d queues are accountable so far\n",
1338 dqm->total_queue_count);
1339
efeaed4d 1340 dqm_unlock(dqm);
72a01d23
FK
1341 return retval;
1342
70d488fb
OZ
1343out_deallocate_doorbell:
1344 deallocate_doorbell(qpd, q);
72a01d23 1345out_deallocate_sdma_queue:
1b4670f6 1346 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
38bb4226
OZ
1347 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1348 dqm_lock(dqm);
1b4670f6 1349 deallocate_sdma_queue(dqm, q);
38bb4226
OZ
1350 dqm_unlock(dqm);
1351 }
70d488fb 1352out:
64c7f8cf
BG
1353 return retval;
1354}
1355
b010affe
QH
1356int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1357 uint64_t fence_value,
8c72c3d7 1358 unsigned int timeout_ms)
64c7f8cf 1359{
8c72c3d7 1360 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
64c7f8cf
BG
1361
1362 while (*fence_addr != fence_value) {
8c72c3d7 1363 if (time_after(jiffies, end_jiffies)) {
79775b62 1364 pr_err("qcm fence wait loop timeout expired\n");
0e9a860c
YZ
1365 /* In HWS case, this is used to halt the driver thread
1366 * in order not to mess up CP states before doing
1367 * scandumps for FW debugging.
1368 */
1369 while (halt_if_hws_hang)
1370 schedule();
1371
64c7f8cf
BG
1372 return -ETIME;
1373 }
99331a51 1374 schedule();
64c7f8cf
BG
1375 }
1376
1377 return 0;
1378}
1379
60a00956
FK
1380/* dqm->lock mutex has to be locked before calling this function */
1381static int map_queues_cpsch(struct device_queue_manager *dqm)
1382{
1383 int retval;
1384
2c99a547
PY
1385 if (!dqm->sched_running)
1386 return 0;
81b820b3 1387 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
60a00956 1388 return 0;
60a00956
FK
1389 if (dqm->active_runlist)
1390 return 0;
1391
1392 retval = pm_send_runlist(&dqm->packets, &dqm->queues);
14328aa5 1393 pr_debug("%s sent runlist\n", __func__);
60a00956
FK
1394 if (retval) {
1395 pr_err("failed to execute runlist\n");
1396 return retval;
1397 }
1398 dqm->active_runlist = true;
1399
1400 return retval;
1401}
1402
ac30c783 1403/* dqm->lock mutex has to be locked before calling this function */
7da2bcf8 1404static int unmap_queues_cpsch(struct device_queue_manager *dqm,
4465f466
YZ
1405 enum kfd_unmap_queues_filter filter,
1406 uint32_t filter_param)
64c7f8cf 1407{
9fd3f1bf 1408 int retval = 0;
51a0f459 1409 struct mqd_manager *mqd_mgr;
64c7f8cf 1410
2c99a547
PY
1411 if (!dqm->sched_running)
1412 return 0;
73ea648d
SL
1413 if (dqm->is_hws_hang)
1414 return -EIO;
991ca8ee 1415 if (!dqm->active_runlist)
ac30c783 1416 return retval;
bcea3081 1417
64c7f8cf 1418 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
4465f466 1419 filter, filter_param, false, 0);
4eacc26b 1420 if (retval)
ac30c783 1421 return retval;
64c7f8cf
BG
1422
1423 *dqm->fence_addr = KFD_FENCE_INIT;
1424 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1425 KFD_FENCE_COMPLETED);
1426 /* should be timed out */
c3447e81 1427 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
14328aa5 1428 queue_preemption_timeout_ms);
09c34e8d
FK
1429 if (retval) {
1430 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1431 dqm->is_hws_hang = true;
1432 /* It's possible we're detecting a HWS hang in the
1433 * middle of a GPU reset. No need to schedule another
1434 * reset in this case.
1435 */
1436 if (!dqm->is_resetting)
1437 schedule_work(&dqm->hw_exception_work);
ac30c783 1438 return retval;
09c34e8d 1439 }
9fd3f1bf 1440
51a0f459
OZ
1441 /* In the current MEC firmware implementation, if compute queue
1442 * doesn't response to the preemption request in time, HIQ will
1443 * abandon the unmap request without returning any timeout error
1444 * to driver. Instead, MEC firmware will log the doorbell of the
1445 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
1446 * To make sure the queue unmap was successful, driver need to
1447 * check those fields
1448 */
1449 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1450 if (mqd_mgr->read_doorbell_id(dqm->packets.priv_queue->queue->mqd)) {
1451 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1452 while (halt_if_hws_hang)
1453 schedule();
1454 return -ETIME;
1455 }
1456
64c7f8cf
BG
1457 pm_release_ib(&dqm->packets);
1458 dqm->active_runlist = false;
1459
64c7f8cf
BG
1460 return retval;
1461}
1462
ac30c783 1463/* dqm->lock mutex has to be locked before calling this function */
c4744e24
YZ
1464static int execute_queues_cpsch(struct device_queue_manager *dqm,
1465 enum kfd_unmap_queues_filter filter,
1466 uint32_t filter_param)
64c7f8cf
BG
1467{
1468 int retval;
1469
73ea648d
SL
1470 if (dqm->is_hws_hang)
1471 return -EIO;
c4744e24 1472 retval = unmap_queues_cpsch(dqm, filter, filter_param);
09c34e8d 1473 if (retval)
ac30c783 1474 return retval;
64c7f8cf 1475
60a00956 1476 return map_queues_cpsch(dqm);
64c7f8cf
BG
1477}
1478
1479static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1480 struct qcm_process_device *qpd,
1481 struct queue *q)
1482{
1483 int retval;
8d5f3552 1484 struct mqd_manager *mqd_mgr;
d69fd951
MJ
1485 uint64_t sdma_val = 0;
1486 struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1487
1488 /* Get the SDMA queue stats */
1489 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1490 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
818b0324 1491 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
d69fd951
MJ
1492 &sdma_val);
1493 if (retval)
1494 pr_err("Failed to read SDMA queue counter for queue: %d\n",
1495 q->properties.queue_id);
1496 }
992839ad 1497
64c7f8cf
BG
1498 retval = 0;
1499
1500 /* remove queue from list to prevent rescheduling after preemption */
efeaed4d 1501 dqm_lock(dqm);
992839ad
YS
1502
1503 if (qpd->is_debug) {
1504 /*
1505 * error, currently we do not allow to destroy a queue
1506 * of a currently debugged process
1507 */
1508 retval = -EBUSY;
1509 goto failed_try_destroy_debugged_queue;
1510
1511 }
1512
fdfa090b
OZ
1513 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1514 q->properties.type)];
64c7f8cf 1515
ef568db7
FK
1516 deallocate_doorbell(qpd, q);
1517
d69fd951
MJ
1518 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1519 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1b4670f6 1520 deallocate_sdma_queue(dqm, q);
d69fd951
MJ
1521 pdd->sdma_past_activity_counter += sdma_val;
1522 }
bcea3081 1523
64c7f8cf 1524 list_del(&q->list);
bc920fd4 1525 qpd->queue_count--;
40a526dc 1526 if (q->properties.is_active) {
b42902f4 1527 decrement_queue_count(dqm, q->properties.type);
40a526dc 1528 retval = execute_queues_cpsch(dqm,
9fd3f1bf 1529 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
40a526dc
YZ
1530 if (retval == -ETIME)
1531 qpd->reset_wavefronts = true;
b8020b03
JG
1532 if (q->properties.is_gws) {
1533 dqm->gws_queue_count--;
1534 qpd->mapped_gws_queue = false;
1535 }
40a526dc 1536 }
64c7f8cf 1537
b8cbab04
OG
1538 /*
1539 * Unconditionally decrement this counter, regardless of the queue's
1540 * type
1541 */
1542 dqm->total_queue_count--;
1543 pr_debug("Total of %d queues are accountable so far\n",
1544 dqm->total_queue_count);
64c7f8cf 1545
efeaed4d 1546 dqm_unlock(dqm);
64c7f8cf 1547
8636e53c
OZ
1548 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1549 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
89cd9d23 1550
9e827224 1551 return retval;
64c7f8cf 1552
992839ad
YS
1553failed_try_destroy_debugged_queue:
1554
efeaed4d 1555 dqm_unlock(dqm);
64c7f8cf
BG
1556 return retval;
1557}
1558
1559/*
1560 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1561 * stay in user mode.
1562 */
1563#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1564/* APE1 limit is inclusive and 64K aligned. */
1565#define APE1_LIMIT_ALIGNMENT 0xFFFF
1566
1567static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1568 struct qcm_process_device *qpd,
1569 enum cache_policy default_policy,
1570 enum cache_policy alternate_policy,
1571 void __user *alternate_aperture_base,
1572 uint64_t alternate_aperture_size)
1573{
bed4f110
FK
1574 bool retval = true;
1575
1576 if (!dqm->asic_ops.set_cache_memory_policy)
1577 return retval;
64c7f8cf 1578
efeaed4d 1579 dqm_lock(dqm);
64c7f8cf
BG
1580
1581 if (alternate_aperture_size == 0) {
1582 /* base > limit disables APE1 */
1583 qpd->sh_mem_ape1_base = 1;
1584 qpd->sh_mem_ape1_limit = 0;
1585 } else {
1586 /*
1587 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1588 * SH_MEM_APE1_BASE[31:0], 0x0000 }
1589 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1590 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1591 * Verify that the base and size parameters can be
1592 * represented in this format and convert them.
1593 * Additionally restrict APE1 to user-mode addresses.
1594 */
1595
1596 uint64_t base = (uintptr_t)alternate_aperture_base;
1597 uint64_t limit = base + alternate_aperture_size - 1;
1598
ab7c1648
KR
1599 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1600 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1601 retval = false;
64c7f8cf 1602 goto out;
ab7c1648 1603 }
64c7f8cf
BG
1604
1605 qpd->sh_mem_ape1_base = base >> 16;
1606 qpd->sh_mem_ape1_limit = limit >> 16;
1607 }
1608
bfd5e378 1609 retval = dqm->asic_ops.set_cache_memory_policy(
a22fc854
BG
1610 dqm,
1611 qpd,
1612 default_policy,
1613 alternate_policy,
1614 alternate_aperture_base,
1615 alternate_aperture_size);
64c7f8cf 1616
d146c5a7 1617 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
64c7f8cf
BG
1618 program_sh_mem_settings(dqm, qpd);
1619
79775b62 1620 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
64c7f8cf
BG
1621 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1622 qpd->sh_mem_ape1_limit);
1623
64c7f8cf 1624out:
efeaed4d 1625 dqm_unlock(dqm);
ab7c1648 1626 return retval;
64c7f8cf
BG
1627}
1628
9fd3f1bf
FK
1629static int process_termination_nocpsch(struct device_queue_manager *dqm,
1630 struct qcm_process_device *qpd)
1631{
1632 struct queue *q, *next;
1633 struct device_process_node *cur, *next_dpn;
1634 int retval = 0;
32cce8bc 1635 bool found = false;
9fd3f1bf 1636
efeaed4d 1637 dqm_lock(dqm);
9fd3f1bf
FK
1638
1639 /* Clear all user mode queues */
1640 list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1641 int ret;
1642
1643 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1644 if (ret)
1645 retval = ret;
1646 }
1647
1648 /* Unregister process */
1649 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1650 if (qpd == cur->qpd) {
1651 list_del(&cur->list);
1652 kfree(cur);
1653 dqm->processes_count--;
32cce8bc 1654 found = true;
9fd3f1bf
FK
1655 break;
1656 }
1657 }
1658
efeaed4d 1659 dqm_unlock(dqm);
32cce8bc
FK
1660
1661 /* Outside the DQM lock because under the DQM lock we can't do
1662 * reclaim or take other locks that others hold while reclaiming.
1663 */
1664 if (found)
1665 kfd_dec_compute_active(dqm->dev);
1666
9fd3f1bf
FK
1667 return retval;
1668}
1669
5df099e8
JC
1670static int get_wave_state(struct device_queue_manager *dqm,
1671 struct queue *q,
1672 void __user *ctl_stack,
1673 u32 *ctl_stack_used_size,
1674 u32 *save_area_used_size)
1675{
4e6c6fc1 1676 struct mqd_manager *mqd_mgr;
5df099e8
JC
1677 int r;
1678
1679 dqm_lock(dqm);
1680
1681 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1682 q->properties.is_active || !q->device->cwsr_enabled) {
1683 r = -EINVAL;
1684 goto dqm_unlock;
1685 }
1686
d7c0b047 1687 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
5df099e8 1688
4e6c6fc1 1689 if (!mqd_mgr->get_wave_state) {
5df099e8
JC
1690 r = -EINVAL;
1691 goto dqm_unlock;
1692 }
1693
4e6c6fc1
YZ
1694 r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1695 ctl_stack_used_size, save_area_used_size);
5df099e8
JC
1696
1697dqm_unlock:
1698 dqm_unlock(dqm);
1699 return r;
1700}
9fd3f1bf
FK
1701
1702static int process_termination_cpsch(struct device_queue_manager *dqm,
1703 struct qcm_process_device *qpd)
1704{
1705 int retval;
1706 struct queue *q, *next;
1707 struct kernel_queue *kq, *kq_next;
8d5f3552 1708 struct mqd_manager *mqd_mgr;
9fd3f1bf
FK
1709 struct device_process_node *cur, *next_dpn;
1710 enum kfd_unmap_queues_filter filter =
1711 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
32cce8bc 1712 bool found = false;
9fd3f1bf
FK
1713
1714 retval = 0;
1715
efeaed4d 1716 dqm_lock(dqm);
9fd3f1bf
FK
1717
1718 /* Clean all kernel queues */
1719 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1720 list_del(&kq->list);
b42902f4 1721 decrement_queue_count(dqm, kq->queue->properties.type);
9fd3f1bf
FK
1722 qpd->is_debug = false;
1723 dqm->total_queue_count--;
1724 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1725 }
1726
1727 /* Clear all user mode queues */
1728 list_for_each_entry(q, &qpd->queues_list, list) {
c7637c95 1729 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1b4670f6 1730 deallocate_sdma_queue(dqm, q);
c7637c95 1731 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1b4670f6 1732 deallocate_sdma_queue(dqm, q);
9fd3f1bf 1733
b8020b03 1734 if (q->properties.is_active) {
b42902f4 1735 decrement_queue_count(dqm, q->properties.type);
b8020b03
JG
1736 if (q->properties.is_gws) {
1737 dqm->gws_queue_count--;
1738 qpd->mapped_gws_queue = false;
1739 }
1740 }
9fd3f1bf
FK
1741
1742 dqm->total_queue_count--;
1743 }
1744
1745 /* Unregister process */
1746 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1747 if (qpd == cur->qpd) {
1748 list_del(&cur->list);
1749 kfree(cur);
1750 dqm->processes_count--;
32cce8bc 1751 found = true;
9fd3f1bf
FK
1752 break;
1753 }
1754 }
1755
1756 retval = execute_queues_cpsch(dqm, filter, 0);
73ea648d 1757 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
9fd3f1bf
FK
1758 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1759 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1760 qpd->reset_wavefronts = false;
1761 }
1762
89cd9d23
PY
1763 dqm_unlock(dqm);
1764
32cce8bc
FK
1765 /* Outside the DQM lock because under the DQM lock we can't do
1766 * reclaim or take other locks that others hold while reclaiming.
1767 */
1768 if (found)
1769 kfd_dec_compute_active(dqm->dev);
1770
89cd9d23 1771 /* Lastly, free mqd resources.
8636e53c 1772 * Do free_mqd() after dqm_unlock to avoid circular locking.
89cd9d23 1773 */
9fd3f1bf 1774 list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
fdfa090b
OZ
1775 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1776 q->properties.type)];
9fd3f1bf 1777 list_del(&q->list);
bc920fd4 1778 qpd->queue_count--;
8636e53c 1779 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
9fd3f1bf
FK
1780 }
1781
9fd3f1bf
FK
1782 return retval;
1783}
1784
fdfa090b
OZ
1785static int init_mqd_managers(struct device_queue_manager *dqm)
1786{
1787 int i, j;
1788 struct mqd_manager *mqd_mgr;
1789
1790 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1791 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1792 if (!mqd_mgr) {
1793 pr_err("mqd manager [%d] initialization failed\n", i);
1794 goto out_free;
1795 }
1796 dqm->mqd_mgrs[i] = mqd_mgr;
1797 }
1798
1799 return 0;
1800
1801out_free:
1802 for (j = 0; j < i; j++) {
1803 kfree(dqm->mqd_mgrs[j]);
1804 dqm->mqd_mgrs[j] = NULL;
1805 }
1806
1807 return -ENOMEM;
1808}
11614c36
OZ
1809
1810/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1811static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1812{
1813 int retval;
1814 struct kfd_dev *dev = dqm->dev;
1815 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1816 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
c7637c95 1817 get_num_all_sdma_engines(dqm) *
11614c36
OZ
1818 dev->device_info->num_sdma_queues_per_engine +
1819 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1820
1821 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1822 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
f2cc50ce 1823 (void *)&(mem_obj->cpu_ptr), false);
11614c36
OZ
1824
1825 return retval;
1826}
1827
64c7f8cf
BG
1828struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1829{
1830 struct device_queue_manager *dqm;
1831
79775b62 1832 pr_debug("Loading device queue manager\n");
a22fc854 1833
dbf56ab1 1834 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
64c7f8cf
BG
1835 if (!dqm)
1836 return NULL;
1837
d146c5a7
FK
1838 switch (dev->device_info->asic_family) {
1839 /* HWS is not available on Hawaii. */
1840 case CHIP_HAWAII:
1841 /* HWS depends on CWSR for timely dequeue. CWSR is not
1842 * available on Tonga.
1843 *
1844 * FIXME: This argument also applies to Kaveri.
1845 */
1846 case CHIP_TONGA:
1847 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1848 break;
1849 default:
1850 dqm->sched_policy = sched_policy;
1851 break;
1852 }
1853
64c7f8cf 1854 dqm->dev = dev;
d146c5a7 1855 switch (dqm->sched_policy) {
64c7f8cf
BG
1856 case KFD_SCHED_POLICY_HWS:
1857 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1858 /* initialize dqm for cp scheduling */
45c9a5e4
OG
1859 dqm->ops.create_queue = create_queue_cpsch;
1860 dqm->ops.initialize = initialize_cpsch;
1861 dqm->ops.start = start_cpsch;
1862 dqm->ops.stop = stop_cpsch;
09c34e8d 1863 dqm->ops.pre_reset = pre_reset;
45c9a5e4
OG
1864 dqm->ops.destroy_queue = destroy_queue_cpsch;
1865 dqm->ops.update_queue = update_queue;
58dcd5bf
YZ
1866 dqm->ops.register_process = register_process;
1867 dqm->ops.unregister_process = unregister_process;
1868 dqm->ops.uninitialize = uninitialize;
45c9a5e4
OG
1869 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1870 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1871 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
9fd3f1bf 1872 dqm->ops.process_termination = process_termination_cpsch;
26103436
FK
1873 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1874 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
5df099e8 1875 dqm->ops.get_wave_state = get_wave_state;
64c7f8cf
BG
1876 break;
1877 case KFD_SCHED_POLICY_NO_HWS:
1878 /* initialize dqm for no cp scheduling */
45c9a5e4
OG
1879 dqm->ops.start = start_nocpsch;
1880 dqm->ops.stop = stop_nocpsch;
09c34e8d 1881 dqm->ops.pre_reset = pre_reset;
45c9a5e4
OG
1882 dqm->ops.create_queue = create_queue_nocpsch;
1883 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1884 dqm->ops.update_queue = update_queue;
58dcd5bf
YZ
1885 dqm->ops.register_process = register_process;
1886 dqm->ops.unregister_process = unregister_process;
45c9a5e4 1887 dqm->ops.initialize = initialize_nocpsch;
58dcd5bf 1888 dqm->ops.uninitialize = uninitialize;
45c9a5e4 1889 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
9fd3f1bf 1890 dqm->ops.process_termination = process_termination_nocpsch;
26103436
FK
1891 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1892 dqm->ops.restore_process_queues =
1893 restore_process_queues_nocpsch;
5df099e8 1894 dqm->ops.get_wave_state = get_wave_state;
64c7f8cf
BG
1895 break;
1896 default:
d146c5a7 1897 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
32fa8219 1898 goto out_free;
64c7f8cf
BG
1899 }
1900
a22fc854
BG
1901 switch (dev->device_info->asic_family) {
1902 case CHIP_CARRIZO:
bfd5e378 1903 device_queue_manager_init_vi(&dqm->asic_ops);
300dec95
OG
1904 break;
1905
a22fc854 1906 case CHIP_KAVERI:
bfd5e378 1907 device_queue_manager_init_cik(&dqm->asic_ops);
300dec95 1908 break;
97672cbe
FK
1909
1910 case CHIP_HAWAII:
1911 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1912 break;
1913
1914 case CHIP_TONGA:
1915 case CHIP_FIJI:
1916 case CHIP_POLARIS10:
1917 case CHIP_POLARIS11:
846a44d7 1918 case CHIP_POLARIS12:
ed81cd6e 1919 case CHIP_VEGAM:
97672cbe
FK
1920 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1921 break;
bed4f110
FK
1922
1923 case CHIP_VEGA10:
846a44d7 1924 case CHIP_VEGA12:
22a3a294 1925 case CHIP_VEGA20:
bed4f110 1926 case CHIP_RAVEN:
5a959a89 1927 case CHIP_RENOIR:
49adcf8a 1928 case CHIP_ARCTURUS:
36e22d59 1929 case CHIP_ALDEBARAN:
bed4f110
FK
1930 device_queue_manager_init_v9(&dqm->asic_ops);
1931 break;
14328aa5 1932 case CHIP_NAVI10:
0e94b564 1933 case CHIP_NAVI12:
8099ae40 1934 case CHIP_NAVI14:
3a2f0c81 1935 case CHIP_SIENNA_CICHLID:
de89b2e4 1936 case CHIP_NAVY_FLOUNDER:
3a5e715d 1937 case CHIP_VANGOGH:
eb5a34d4 1938 case CHIP_DIMGREY_CAVEFISH:
14328aa5
PC
1939 device_queue_manager_init_v10_navi10(&dqm->asic_ops);
1940 break;
e596b903
YZ
1941 default:
1942 WARN(1, "Unexpected ASIC family %u",
1943 dev->device_info->asic_family);
1944 goto out_free;
a22fc854
BG
1945 }
1946
fdfa090b
OZ
1947 if (init_mqd_managers(dqm))
1948 goto out_free;
1949
11614c36
OZ
1950 if (allocate_hiq_sdma_mqd(dqm)) {
1951 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1952 goto out_free;
1953 }
1954
32fa8219
FK
1955 if (!dqm->ops.initialize(dqm))
1956 return dqm;
64c7f8cf 1957
32fa8219
FK
1958out_free:
1959 kfree(dqm);
1960 return NULL;
64c7f8cf
BG
1961}
1962
7fd5a6fb
Y
1963static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
1964 struct kfd_mem_obj *mqd)
11614c36
OZ
1965{
1966 WARN(!mqd, "No hiq sdma mqd trunk to free");
1967
1968 amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1969}
1970
64c7f8cf
BG
1971void device_queue_manager_uninit(struct device_queue_manager *dqm)
1972{
45c9a5e4 1973 dqm->ops.uninitialize(dqm);
11614c36 1974 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
64c7f8cf
BG
1975 kfree(dqm);
1976}
851a645e 1977
c7b6bac9 1978int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
2640c3fa 1979{
1980 struct kfd_process_device *pdd;
1981 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1982 int ret = 0;
1983
1984 if (!p)
1985 return -EINVAL;
8a491bb3 1986 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2640c3fa 1987 pdd = kfd_get_process_device_data(dqm->dev, p);
1988 if (pdd)
1989 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
1990 kfd_unref_process(p);
1991
1992 return ret;
1993}
1994
73ea648d
SL
1995static void kfd_process_hw_exception(struct work_struct *work)
1996{
1997 struct device_queue_manager *dqm = container_of(work,
1998 struct device_queue_manager, hw_exception_work);
5b87245f 1999 amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
73ea648d
SL
2000}
2001
851a645e
FK
2002#if defined(CONFIG_DEBUG_FS)
2003
2004static void seq_reg_dump(struct seq_file *m,
2005 uint32_t (*dump)[2], uint32_t n_regs)
2006{
2007 uint32_t i, count;
2008
2009 for (i = 0, count = 0; i < n_regs; i++) {
2010 if (count == 0 ||
2011 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2012 seq_printf(m, "%s %08x: %08x",
2013 i ? "\n" : "",
2014 dump[i][0], dump[i][1]);
2015 count = 7;
2016 } else {
2017 seq_printf(m, " %08x", dump[i][1]);
2018 count--;
2019 }
2020 }
2021
2022 seq_puts(m, "\n");
2023}
2024
2025int dqm_debugfs_hqds(struct seq_file *m, void *data)
2026{
2027 struct device_queue_manager *dqm = data;
2028 uint32_t (*dump)[2], n_regs;
2029 int pipe, queue;
2030 int r = 0;
2031
2c99a547
PY
2032 if (!dqm->sched_running) {
2033 seq_printf(m, " Device is stopped\n");
2034
2035 return 0;
2036 }
2037
24f48a42 2038 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
14328aa5
PC
2039 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2040 &dump, &n_regs);
24f48a42
OZ
2041 if (!r) {
2042 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
14328aa5
PC
2043 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2044 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2045 KFD_CIK_HIQ_QUEUE);
24f48a42
OZ
2046 seq_reg_dump(m, dump, n_regs);
2047
2048 kfree(dump);
2049 }
2050
851a645e
FK
2051 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2052 int pipe_offset = pipe * get_queues_per_pipe(dqm);
2053
2054 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2055 if (!test_bit(pipe_offset + queue,
e6945304 2056 dqm->dev->shared_resources.cp_queue_bitmap))
851a645e
FK
2057 continue;
2058
2059 r = dqm->dev->kfd2kgd->hqd_dump(
2060 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2061 if (r)
2062 break;
2063
2064 seq_printf(m, " CP Pipe %d, Queue %d\n",
2065 pipe, queue);
2066 seq_reg_dump(m, dump, n_regs);
2067
2068 kfree(dump);
2069 }
2070 }
2071
c7637c95 2072 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
d5094189
SL
2073 for (queue = 0;
2074 queue < dqm->dev->device_info->num_sdma_queues_per_engine;
2075 queue++) {
851a645e
FK
2076 r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2077 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2078 if (r)
2079 break;
2080
2081 seq_printf(m, " SDMA Engine %d, RLC %d\n",
2082 pipe, queue);
2083 seq_reg_dump(m, dump, n_regs);
2084
2085 kfree(dump);
2086 }
2087 }
2088
2089 return r;
2090}
2091
a29ec470
SL
2092int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
2093{
2094 int r = 0;
2095
2096 dqm_lock(dqm);
2097 dqm->active_runlist = true;
2098 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2099 dqm_unlock(dqm);
2100
2101 return r;
2102}
2103
851a645e 2104#endif