Commit | Line | Data |
---|---|---|
241f24f8 BG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | */ | |
23 | ||
24 | #include <linux/slab.h> | |
25 | #include <linux/mutex.h> | |
26 | #include "kfd_device_queue_manager.h" | |
27 | #include "kfd_kernel_queue.h" | |
28 | #include "kfd_priv.h" | |
e1940fa4 | 29 | #include "kfd_pm4_headers_vi.h" |
241f24f8 BG |
30 | #include "kfd_pm4_opcodes.h" |
31 | ||
32 | static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, | |
33 | unsigned int buffer_size_bytes) | |
34 | { | |
35 | unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); | |
36 | ||
32fa8219 FK |
37 | WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, |
38 | "Runlist IB overflow"); | |
241f24f8 BG |
39 | *wptr = temp; |
40 | } | |
41 | ||
42 | static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) | |
43 | { | |
44 | union PM4_MES_TYPE_3_HEADER header; | |
45 | ||
507968dd | 46 | header.u32All = 0; |
241f24f8 | 47 | header.opcode = opcode; |
6d566930 | 48 | header.count = packet_size / 4 - 2; |
241f24f8 BG |
49 | header.type = PM4_TYPE_3; |
50 | ||
507968dd | 51 | return header.u32All; |
241f24f8 BG |
52 | } |
53 | ||
54 | static void pm_calc_rlib_size(struct packet_manager *pm, | |
55 | unsigned int *rlib_size, | |
56 | bool *over_subscription) | |
57 | { | |
36582fa5 | 58 | unsigned int process_count, queue_count, compute_queue_count; |
e1940fa4 | 59 | unsigned int map_queue_size; |
a99c6d4f FK |
60 | unsigned int max_proc_per_quantum = 1; |
61 | struct kfd_dev *dev = pm->dqm->dev; | |
241f24f8 | 62 | |
241f24f8 BG |
63 | process_count = pm->dqm->processes_count; |
64 | queue_count = pm->dqm->queue_count; | |
36582fa5 | 65 | compute_queue_count = queue_count - pm->dqm->sdma_queue_count; |
241f24f8 | 66 | |
a99c6d4f FK |
67 | /* check if there is over subscription |
68 | * Note: the arbitration between the number of VMIDs and | |
69 | * hws_max_conc_proc has been done in | |
70 | * kgd2kfd_device_init(). | |
71 | */ | |
241f24f8 | 72 | *over_subscription = false; |
a99c6d4f FK |
73 | |
74 | if (dev->max_proc_per_quantum > 1) | |
75 | max_proc_per_quantum = dev->max_proc_per_quantum; | |
76 | ||
77 | if ((process_count > max_proc_per_quantum) || | |
36582fa5 | 78 | compute_queue_count > get_queues_num(pm->dqm)) { |
241f24f8 | 79 | *over_subscription = true; |
79775b62 | 80 | pr_debug("Over subscribed runlist\n"); |
241f24f8 BG |
81 | } |
82 | ||
507968dd | 83 | map_queue_size = sizeof(struct pm4_mes_map_queues); |
241f24f8 | 84 | /* calculate run list ib allocation size */ |
507968dd | 85 | *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + |
e1940fa4 | 86 | queue_count * map_queue_size; |
241f24f8 BG |
87 | |
88 | /* | |
89 | * Increase the allocation size in case we need a chained run list | |
90 | * when over subscription | |
91 | */ | |
92 | if (*over_subscription) | |
507968dd | 93 | *rlib_size += sizeof(struct pm4_mes_runlist); |
241f24f8 | 94 | |
79775b62 | 95 | pr_debug("runlist ib size %d\n", *rlib_size); |
241f24f8 BG |
96 | } |
97 | ||
98 | static int pm_allocate_runlist_ib(struct packet_manager *pm, | |
99 | unsigned int **rl_buffer, | |
100 | uint64_t *rl_gpu_buffer, | |
101 | unsigned int *rl_buffer_size, | |
102 | bool *is_over_subscription) | |
103 | { | |
104 | int retval; | |
105 | ||
32fa8219 FK |
106 | if (WARN_ON(pm->allocated)) |
107 | return -EINVAL; | |
241f24f8 BG |
108 | |
109 | pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); | |
110 | ||
a86aa3ca OG |
111 | retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, |
112 | &pm->ib_buffer_obj); | |
241f24f8 | 113 | |
4eacc26b | 114 | if (retval) { |
79775b62 | 115 | pr_err("Failed to allocate runlist IB\n"); |
241f24f8 BG |
116 | return retval; |
117 | } | |
118 | ||
119 | *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; | |
120 | *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr; | |
121 | ||
122 | memset(*rl_buffer, 0, *rl_buffer_size); | |
123 | pm->allocated = true; | |
124 | return retval; | |
125 | } | |
126 | ||
127 | static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, | |
128 | uint64_t ib, size_t ib_size_in_dwords, bool chain) | |
129 | { | |
507968dd | 130 | struct pm4_mes_runlist *packet; |
a99c6d4f FK |
131 | int concurrent_proc_cnt = 0; |
132 | struct kfd_dev *kfd = pm->dqm->dev; | |
241f24f8 | 133 | |
32fa8219 FK |
134 | if (WARN_ON(!ib)) |
135 | return -EFAULT; | |
241f24f8 | 136 | |
a99c6d4f FK |
137 | /* Determine the number of processes to map together to HW: |
138 | * it can not exceed the number of VMIDs available to the | |
139 | * scheduler, and it is determined by the smaller of the number | |
140 | * of processes in the runlist and kfd module parameter | |
141 | * hws_max_conc_proc. | |
142 | * Note: the arbitration between the number of VMIDs and | |
143 | * hws_max_conc_proc has been done in | |
144 | * kgd2kfd_device_init(). | |
145 | */ | |
146 | concurrent_proc_cnt = min(pm->dqm->processes_count, | |
147 | kfd->max_proc_per_quantum); | |
148 | ||
507968dd | 149 | packet = (struct pm4_mes_runlist *)buffer; |
241f24f8 | 150 | |
507968dd FK |
151 | memset(buffer, 0, sizeof(struct pm4_mes_runlist)); |
152 | packet->header.u32All = build_pm4_header(IT_RUN_LIST, | |
153 | sizeof(struct pm4_mes_runlist)); | |
241f24f8 BG |
154 | |
155 | packet->bitfields4.ib_size = ib_size_in_dwords; | |
156 | packet->bitfields4.chain = chain ? 1 : 0; | |
157 | packet->bitfields4.offload_polling = 0; | |
158 | packet->bitfields4.valid = 1; | |
a99c6d4f | 159 | packet->bitfields4.process_cnt = concurrent_proc_cnt; |
241f24f8 BG |
160 | packet->ordinal2 = lower_32_bits(ib); |
161 | packet->bitfields3.ib_base_hi = upper_32_bits(ib); | |
162 | ||
163 | return 0; | |
164 | } | |
165 | ||
166 | static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, | |
167 | struct qcm_process_device *qpd) | |
168 | { | |
507968dd | 169 | struct pm4_mes_map_process *packet; |
241f24f8 | 170 | |
507968dd | 171 | packet = (struct pm4_mes_map_process *)buffer; |
241f24f8 | 172 | |
507968dd | 173 | memset(buffer, 0, sizeof(struct pm4_mes_map_process)); |
241f24f8 | 174 | |
507968dd FK |
175 | packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, |
176 | sizeof(struct pm4_mes_map_process)); | |
241f24f8 BG |
177 | packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; |
178 | packet->bitfields2.process_quantum = 1; | |
179 | packet->bitfields2.pasid = qpd->pqm->process->pasid; | |
180 | packet->bitfields3.page_table_base = qpd->page_table_base; | |
181 | packet->bitfields10.gds_size = qpd->gds_size; | |
182 | packet->bitfields10.num_gws = qpd->num_gws; | |
183 | packet->bitfields10.num_oac = qpd->num_oac; | |
bc920fd4 | 184 | packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; |
241f24f8 BG |
185 | |
186 | packet->sh_mem_config = qpd->sh_mem_config; | |
187 | packet->sh_mem_bases = qpd->sh_mem_bases; | |
188 | packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; | |
189 | packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; | |
190 | ||
507968dd FK |
191 | /* TODO: scratch support */ |
192 | packet->sh_hidden_private_base_vmid = 0; | |
193 | ||
241f24f8 BG |
194 | packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); |
195 | packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); | |
196 | ||
197 | return 0; | |
198 | } | |
199 | ||
507968dd | 200 | static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, |
d7b8f73e BG |
201 | struct queue *q, bool is_static) |
202 | { | |
203 | struct pm4_mes_map_queues *packet; | |
204 | bool use_static = is_static; | |
205 | ||
d7b8f73e | 206 | packet = (struct pm4_mes_map_queues *)buffer; |
507968dd | 207 | memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); |
d7b8f73e | 208 | |
507968dd FK |
209 | packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, |
210 | sizeof(struct pm4_mes_map_queues)); | |
d7b8f73e BG |
211 | packet->bitfields2.alloc_format = |
212 | alloc_format__mes_map_queues__one_per_pipe_vi; | |
213 | packet->bitfields2.num_queues = 1; | |
214 | packet->bitfields2.queue_sel = | |
215 | queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; | |
216 | ||
217 | packet->bitfields2.engine_sel = | |
218 | engine_sel__mes_map_queues__compute_vi; | |
219 | packet->bitfields2.queue_type = | |
220 | queue_type__mes_map_queues__normal_compute_vi; | |
221 | ||
222 | switch (q->properties.type) { | |
223 | case KFD_QUEUE_TYPE_COMPUTE: | |
224 | if (use_static) | |
225 | packet->bitfields2.queue_type = | |
226 | queue_type__mes_map_queues__normal_latency_static_queue_vi; | |
227 | break; | |
228 | case KFD_QUEUE_TYPE_DIQ: | |
229 | packet->bitfields2.queue_type = | |
230 | queue_type__mes_map_queues__debug_interface_queue_vi; | |
231 | break; | |
232 | case KFD_QUEUE_TYPE_SDMA: | |
e139cd2a | 233 | packet->bitfields2.engine_sel = q->properties.sdma_engine_id + |
d7b8f73e BG |
234 | engine_sel__mes_map_queues__sdma0_vi; |
235 | use_static = false; /* no static queues under SDMA */ | |
236 | break; | |
237 | default: | |
32fa8219 FK |
238 | WARN(1, "queue type %d", q->properties.type); |
239 | return -EINVAL; | |
d7b8f73e BG |
240 | } |
241 | packet->bitfields3.doorbell_offset = | |
242 | q->properties.doorbell_off; | |
243 | ||
244 | packet->mqd_addr_lo = | |
245 | lower_32_bits(q->gart_mqd_addr); | |
246 | ||
247 | packet->mqd_addr_hi = | |
248 | upper_32_bits(q->gart_mqd_addr); | |
249 | ||
250 | packet->wptr_addr_lo = | |
251 | lower_32_bits((uint64_t)q->properties.write_ptr); | |
252 | ||
253 | packet->wptr_addr_hi = | |
254 | upper_32_bits((uint64_t)q->properties.write_ptr); | |
255 | ||
256 | return 0; | |
257 | } | |
258 | ||
241f24f8 BG |
259 | static int pm_create_runlist_ib(struct packet_manager *pm, |
260 | struct list_head *queues, | |
261 | uint64_t *rl_gpu_addr, | |
262 | size_t *rl_size_bytes) | |
263 | { | |
264 | unsigned int alloc_size_bytes; | |
265 | unsigned int *rl_buffer, rl_wptr, i; | |
266 | int retval, proccesses_mapped; | |
267 | struct device_process_node *cur; | |
268 | struct qcm_process_device *qpd; | |
269 | struct queue *q; | |
270 | struct kernel_queue *kq; | |
271 | bool is_over_subscription; | |
272 | ||
241f24f8 BG |
273 | rl_wptr = retval = proccesses_mapped = 0; |
274 | ||
275 | retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, | |
276 | &alloc_size_bytes, &is_over_subscription); | |
4eacc26b | 277 | if (retval) |
241f24f8 BG |
278 | return retval; |
279 | ||
280 | *rl_size_bytes = alloc_size_bytes; | |
851a645e | 281 | pm->ib_size_bytes = alloc_size_bytes; |
241f24f8 | 282 | |
79775b62 | 283 | pr_debug("Building runlist ib process count: %d queues count %d\n", |
241f24f8 BG |
284 | pm->dqm->processes_count, pm->dqm->queue_count); |
285 | ||
286 | /* build the run list ib packet */ | |
287 | list_for_each_entry(cur, queues, list) { | |
288 | qpd = cur->qpd; | |
289 | /* build map process packet */ | |
290 | if (proccesses_mapped >= pm->dqm->processes_count) { | |
79775b62 | 291 | pr_debug("Not enough space left in runlist IB\n"); |
241f24f8 BG |
292 | pm_release_ib(pm); |
293 | return -ENOMEM; | |
294 | } | |
992839ad | 295 | |
241f24f8 | 296 | retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); |
4eacc26b | 297 | if (retval) |
241f24f8 | 298 | return retval; |
992839ad | 299 | |
241f24f8 | 300 | proccesses_mapped++; |
507968dd | 301 | inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), |
241f24f8 BG |
302 | alloc_size_bytes); |
303 | ||
304 | list_for_each_entry(kq, &qpd->priv_queue_list, list) { | |
991ca8ee | 305 | if (!kq->queue->properties.is_active) |
241f24f8 | 306 | continue; |
992839ad | 307 | |
79775b62 | 308 | pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", |
992839ad YS |
309 | kq->queue->queue, qpd->is_debug); |
310 | ||
507968dd | 311 | retval = pm_create_map_queue(pm, |
d7b8f73e BG |
312 | &rl_buffer[rl_wptr], |
313 | kq->queue, | |
314 | qpd->is_debug); | |
4eacc26b | 315 | if (retval) |
241f24f8 | 316 | return retval; |
992839ad YS |
317 | |
318 | inc_wptr(&rl_wptr, | |
507968dd | 319 | sizeof(struct pm4_mes_map_queues), |
992839ad | 320 | alloc_size_bytes); |
241f24f8 BG |
321 | } |
322 | ||
323 | list_for_each_entry(q, &qpd->queues_list, list) { | |
991ca8ee | 324 | if (!q->properties.is_active) |
241f24f8 | 325 | continue; |
992839ad | 326 | |
79775b62 | 327 | pr_debug("static_queue, mapping user queue %d, is debug status %d\n", |
992839ad YS |
328 | q->queue, qpd->is_debug); |
329 | ||
507968dd | 330 | retval = pm_create_map_queue(pm, |
d7b8f73e BG |
331 | &rl_buffer[rl_wptr], |
332 | q, | |
333 | qpd->is_debug); | |
992839ad | 334 | |
4eacc26b | 335 | if (retval) |
241f24f8 | 336 | return retval; |
992839ad YS |
337 | |
338 | inc_wptr(&rl_wptr, | |
507968dd | 339 | sizeof(struct pm4_mes_map_queues), |
992839ad | 340 | alloc_size_bytes); |
241f24f8 BG |
341 | } |
342 | } | |
343 | ||
79775b62 | 344 | pr_debug("Finished map process and queues to runlist\n"); |
241f24f8 BG |
345 | |
346 | if (is_over_subscription) | |
32fa8219 FK |
347 | retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], |
348 | *rl_gpu_addr, | |
349 | alloc_size_bytes / sizeof(uint32_t), | |
350 | true); | |
241f24f8 BG |
351 | |
352 | for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) | |
353 | pr_debug("0x%2X ", rl_buffer[i]); | |
354 | pr_debug("\n"); | |
355 | ||
32fa8219 | 356 | return retval; |
241f24f8 BG |
357 | } |
358 | ||
552764b6 FK |
359 | /* pm_create_release_mem - Create a RELEASE_MEM packet and return the size |
360 | * of this packet | |
361 | * @gpu_addr - GPU address of the packet. It's a virtual address. | |
362 | * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer | |
363 | * Return - length of the packet | |
364 | */ | |
365 | uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) | |
366 | { | |
367 | struct pm4_mec_release_mem *packet; | |
368 | ||
369 | WARN_ON(!buffer); | |
370 | ||
371 | packet = (struct pm4_mec_release_mem *)buffer; | |
372 | memset(buffer, 0, sizeof(*packet)); | |
373 | ||
374 | packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, | |
375 | sizeof(*packet)); | |
376 | ||
377 | packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; | |
378 | packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; | |
379 | packet->bitfields2.tcl1_action_ena = 1; | |
380 | packet->bitfields2.tc_action_ena = 1; | |
381 | packet->bitfields2.cache_policy = cache_policy___release_mem__lru; | |
382 | packet->bitfields2.atc = 0; | |
383 | ||
384 | packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; | |
385 | packet->bitfields3.int_sel = | |
386 | int_sel___release_mem__send_interrupt_after_write_confirm; | |
387 | ||
388 | packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; | |
389 | packet->address_hi = upper_32_bits(gpu_addr); | |
390 | ||
391 | packet->data_lo = 0; | |
392 | ||
393 | return sizeof(*packet) / sizeof(unsigned int); | |
394 | } | |
395 | ||
241f24f8 BG |
396 | int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) |
397 | { | |
241f24f8 BG |
398 | pm->dqm = dqm; |
399 | mutex_init(&pm->lock); | |
400 | pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); | |
4eacc26b | 401 | if (!pm->priv_queue) { |
241f24f8 BG |
402 | mutex_destroy(&pm->lock); |
403 | return -ENOMEM; | |
404 | } | |
405 | pm->allocated = false; | |
406 | ||
407 | return 0; | |
408 | } | |
409 | ||
410 | void pm_uninit(struct packet_manager *pm) | |
411 | { | |
241f24f8 BG |
412 | mutex_destroy(&pm->lock); |
413 | kernel_queue_uninit(pm->priv_queue); | |
414 | } | |
415 | ||
416 | int pm_send_set_resources(struct packet_manager *pm, | |
417 | struct scheduling_resources *res) | |
418 | { | |
507968dd | 419 | struct pm4_mes_set_resources *packet; |
ab7c1648 | 420 | int retval = 0; |
241f24f8 | 421 | |
241f24f8 | 422 | mutex_lock(&pm->lock); |
443fbd5f | 423 | pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, |
241f24f8 | 424 | sizeof(*packet) / sizeof(uint32_t), |
8eabaf54 | 425 | (unsigned int **)&packet); |
4eacc26b | 426 | if (!packet) { |
79775b62 | 427 | pr_err("Failed to allocate buffer on kernel queue\n"); |
ab7c1648 KR |
428 | retval = -ENOMEM; |
429 | goto out; | |
241f24f8 BG |
430 | } |
431 | ||
507968dd FK |
432 | memset(packet, 0, sizeof(struct pm4_mes_set_resources)); |
433 | packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, | |
434 | sizeof(struct pm4_mes_set_resources)); | |
241f24f8 BG |
435 | |
436 | packet->bitfields2.queue_type = | |
437 | queue_type__mes_set_resources__hsa_interface_queue_hiq; | |
438 | packet->bitfields2.vmid_mask = res->vmid_mask; | |
b90e3fbe | 439 | packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; |
241f24f8 BG |
440 | packet->bitfields7.oac_mask = res->oac_mask; |
441 | packet->bitfields8.gds_heap_base = res->gds_heap_base; | |
442 | packet->bitfields8.gds_heap_size = res->gds_heap_size; | |
443 | ||
444 | packet->gws_mask_lo = lower_32_bits(res->gws_mask); | |
445 | packet->gws_mask_hi = upper_32_bits(res->gws_mask); | |
446 | ||
447 | packet->queue_mask_lo = lower_32_bits(res->queue_mask); | |
448 | packet->queue_mask_hi = upper_32_bits(res->queue_mask); | |
449 | ||
443fbd5f | 450 | pm->priv_queue->ops.submit_packet(pm->priv_queue); |
241f24f8 | 451 | |
ab7c1648 | 452 | out: |
241f24f8 BG |
453 | mutex_unlock(&pm->lock); |
454 | ||
ab7c1648 | 455 | return retval; |
241f24f8 BG |
456 | } |
457 | ||
458 | int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) | |
459 | { | |
460 | uint64_t rl_gpu_ib_addr; | |
461 | uint32_t *rl_buffer; | |
462 | size_t rl_ib_size, packet_size_dwords; | |
463 | int retval; | |
464 | ||
241f24f8 BG |
465 | retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, |
466 | &rl_ib_size); | |
4eacc26b | 467 | if (retval) |
241f24f8 BG |
468 | goto fail_create_runlist_ib; |
469 | ||
79775b62 | 470 | pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); |
241f24f8 | 471 | |
507968dd | 472 | packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); |
241f24f8 BG |
473 | mutex_lock(&pm->lock); |
474 | ||
443fbd5f | 475 | retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, |
241f24f8 | 476 | packet_size_dwords, &rl_buffer); |
4eacc26b | 477 | if (retval) |
241f24f8 BG |
478 | goto fail_acquire_packet_buffer; |
479 | ||
480 | retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, | |
481 | rl_ib_size / sizeof(uint32_t), false); | |
4eacc26b | 482 | if (retval) |
241f24f8 BG |
483 | goto fail_create_runlist; |
484 | ||
443fbd5f | 485 | pm->priv_queue->ops.submit_packet(pm->priv_queue); |
241f24f8 BG |
486 | |
487 | mutex_unlock(&pm->lock); | |
488 | ||
489 | return retval; | |
490 | ||
491 | fail_create_runlist: | |
443fbd5f | 492 | pm->priv_queue->ops.rollback_packet(pm->priv_queue); |
241f24f8 BG |
493 | fail_acquire_packet_buffer: |
494 | mutex_unlock(&pm->lock); | |
495 | fail_create_runlist_ib: | |
8eabaf54 | 496 | pm_release_ib(pm); |
241f24f8 BG |
497 | return retval; |
498 | } | |
499 | ||
500 | int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, | |
501 | uint32_t fence_value) | |
502 | { | |
503 | int retval; | |
507968dd | 504 | struct pm4_mes_query_status *packet; |
241f24f8 | 505 | |
32fa8219 FK |
506 | if (WARN_ON(!fence_address)) |
507 | return -EFAULT; | |
241f24f8 BG |
508 | |
509 | mutex_lock(&pm->lock); | |
443fbd5f | 510 | retval = pm->priv_queue->ops.acquire_packet_buffer( |
241f24f8 | 511 | pm->priv_queue, |
507968dd | 512 | sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), |
241f24f8 | 513 | (unsigned int **)&packet); |
4eacc26b | 514 | if (retval) |
241f24f8 BG |
515 | goto fail_acquire_packet_buffer; |
516 | ||
507968dd FK |
517 | packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, |
518 | sizeof(struct pm4_mes_query_status)); | |
241f24f8 BG |
519 | |
520 | packet->bitfields2.context_id = 0; | |
521 | packet->bitfields2.interrupt_sel = | |
522 | interrupt_sel__mes_query_status__completion_status; | |
523 | packet->bitfields2.command = | |
524 | command__mes_query_status__fence_only_after_write_ack; | |
525 | ||
526 | packet->addr_hi = upper_32_bits((uint64_t)fence_address); | |
527 | packet->addr_lo = lower_32_bits((uint64_t)fence_address); | |
528 | packet->data_hi = upper_32_bits((uint64_t)fence_value); | |
529 | packet->data_lo = lower_32_bits((uint64_t)fence_value); | |
530 | ||
443fbd5f | 531 | pm->priv_queue->ops.submit_packet(pm->priv_queue); |
241f24f8 BG |
532 | |
533 | fail_acquire_packet_buffer: | |
534 | mutex_unlock(&pm->lock); | |
535 | return retval; | |
536 | } | |
537 | ||
538 | int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, | |
7da2bcf8 | 539 | enum kfd_unmap_queues_filter filter, |
241f24f8 BG |
540 | uint32_t filter_param, bool reset, |
541 | unsigned int sdma_engine) | |
542 | { | |
543 | int retval; | |
544 | uint32_t *buffer; | |
507968dd | 545 | struct pm4_mes_unmap_queues *packet; |
241f24f8 | 546 | |
241f24f8 | 547 | mutex_lock(&pm->lock); |
443fbd5f | 548 | retval = pm->priv_queue->ops.acquire_packet_buffer( |
241f24f8 | 549 | pm->priv_queue, |
507968dd | 550 | sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), |
241f24f8 | 551 | &buffer); |
4eacc26b | 552 | if (retval) |
241f24f8 BG |
553 | goto err_acquire_packet_buffer; |
554 | ||
507968dd FK |
555 | packet = (struct pm4_mes_unmap_queues *)buffer; |
556 | memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); | |
7da2bcf8 YZ |
557 | pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n", |
558 | filter, reset, type); | |
507968dd FK |
559 | packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, |
560 | sizeof(struct pm4_mes_unmap_queues)); | |
241f24f8 BG |
561 | switch (type) { |
562 | case KFD_QUEUE_TYPE_COMPUTE: | |
563 | case KFD_QUEUE_TYPE_DIQ: | |
564 | packet->bitfields2.engine_sel = | |
565 | engine_sel__mes_unmap_queues__compute; | |
566 | break; | |
567 | case KFD_QUEUE_TYPE_SDMA: | |
568 | packet->bitfields2.engine_sel = | |
569 | engine_sel__mes_unmap_queues__sdma0 + sdma_engine; | |
570 | break; | |
571 | default: | |
32fa8219 FK |
572 | WARN(1, "queue type %d", type); |
573 | retval = -EINVAL; | |
574 | goto err_invalid; | |
241f24f8 BG |
575 | } |
576 | ||
577 | if (reset) | |
578 | packet->bitfields2.action = | |
579 | action__mes_unmap_queues__reset_queues; | |
580 | else | |
581 | packet->bitfields2.action = | |
582 | action__mes_unmap_queues__preempt_queues; | |
583 | ||
7da2bcf8 YZ |
584 | switch (filter) { |
585 | case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: | |
241f24f8 BG |
586 | packet->bitfields2.queue_sel = |
587 | queue_sel__mes_unmap_queues__perform_request_on_specified_queues; | |
588 | packet->bitfields2.num_queues = 1; | |
589 | packet->bitfields3b.doorbell_offset0 = filter_param; | |
590 | break; | |
7da2bcf8 | 591 | case KFD_UNMAP_QUEUES_FILTER_BY_PASID: |
241f24f8 BG |
592 | packet->bitfields2.queue_sel = |
593 | queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; | |
594 | packet->bitfields3a.pasid = filter_param; | |
595 | break; | |
7da2bcf8 | 596 | case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: |
241f24f8 | 597 | packet->bitfields2.queue_sel = |
507968dd | 598 | queue_sel__mes_unmap_queues__unmap_all_queues; |
241f24f8 | 599 | break; |
7da2bcf8 | 600 | case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: |
992839ad YS |
601 | /* in this case, we do not preempt static queues */ |
602 | packet->bitfields2.queue_sel = | |
507968dd | 603 | queue_sel__mes_unmap_queues__unmap_all_non_static_queues; |
992839ad | 604 | break; |
241f24f8 | 605 | default: |
7da2bcf8 | 606 | WARN(1, "filter %d", filter); |
32fa8219 FK |
607 | retval = -EINVAL; |
608 | goto err_invalid; | |
371d5b65 | 609 | } |
241f24f8 | 610 | |
443fbd5f | 611 | pm->priv_queue->ops.submit_packet(pm->priv_queue); |
241f24f8 | 612 | |
32fa8219 FK |
613 | mutex_unlock(&pm->lock); |
614 | return 0; | |
615 | ||
616 | err_invalid: | |
617 | pm->priv_queue->ops.rollback_packet(pm->priv_queue); | |
241f24f8 BG |
618 | err_acquire_packet_buffer: |
619 | mutex_unlock(&pm->lock); | |
620 | return retval; | |
621 | } | |
622 | ||
623 | void pm_release_ib(struct packet_manager *pm) | |
624 | { | |
241f24f8 BG |
625 | mutex_lock(&pm->lock); |
626 | if (pm->allocated) { | |
a86aa3ca | 627 | kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); |
241f24f8 BG |
628 | pm->allocated = false; |
629 | } | |
630 | mutex_unlock(&pm->lock); | |
631 | } | |
851a645e FK |
632 | |
633 | #if defined(CONFIG_DEBUG_FS) | |
634 | ||
635 | int pm_debugfs_runlist(struct seq_file *m, void *data) | |
636 | { | |
637 | struct packet_manager *pm = data; | |
638 | ||
639 | mutex_lock(&pm->lock); | |
640 | ||
641 | if (!pm->allocated) { | |
642 | seq_puts(m, " No active runlist\n"); | |
643 | goto out; | |
644 | } | |
645 | ||
646 | seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, | |
647 | pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false); | |
648 | ||
649 | out: | |
650 | mutex_unlock(&pm->lock); | |
651 | return 0; | |
652 | } | |
653 | ||
654 | #endif |