Commit | Line | Data |
---|---|---|
d87f36a0 | 1 | /* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
4a488a7a | 2 | /* |
d87f36a0 | 3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4a488a7a OG |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
21 | * OTHER DEALINGS IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | #ifndef KFD_PRIV_H_INCLUDED | |
25 | #define KFD_PRIV_H_INCLUDED | |
26 | ||
27 | #include <linux/hashtable.h> | |
28 | #include <linux/mmu_notifier.h> | |
dc90f084 | 29 | #include <linux/memremap.h> |
4a488a7a OG |
30 | #include <linux/mutex.h> |
31 | #include <linux/types.h> | |
32 | #include <linux/atomic.h> | |
33 | #include <linux/workqueue.h> | |
34 | #include <linux/spinlock.h> | |
19f6d2a6 | 35 | #include <linux/kfd_ioctl.h> |
482f0777 | 36 | #include <linux/idr.h> |
04ad47bd | 37 | #include <linux/kfifo.h> |
851a645e | 38 | #include <linux/seq_file.h> |
5ce10687 | 39 | #include <linux/kref.h> |
de9f26bb | 40 | #include <linux/sysfs.h> |
6b855f7b | 41 | #include <linux/device_cgroup.h> |
1cd4d9ee SR |
42 | #include <drm/drm_file.h> |
43 | #include <drm/drm_drv.h> | |
44 | #include <drm/drm_device.h> | |
99c7b309 | 45 | #include <drm/drm_ioctl.h> |
4a488a7a | 46 | #include <kgd_kfd_interface.h> |
6d220a7e | 47 | #include <linux/swap.h> |
4a488a7a | 48 | |
e596b903 | 49 | #include "amd_shared.h" |
6ae27841 | 50 | #include "amdgpu.h" |
e596b903 | 51 | |
af47b390 LA |
52 | #define KFD_MAX_RING_ENTRY_SIZE 8 |
53 | ||
5b5c4e40 EP |
54 | #define KFD_SYSFS_FILE_MODE 0444 |
55 | ||
df03ef93 HK |
56 | /* GPU ID hash width in bits */ |
57 | #define KFD_GPU_ID_HASH_WIDTH 16 | |
58 | ||
59 | /* Use upper bits of mmap offset to store KFD driver specific information. | |
60 | * BITS[63:62] - Encode MMAP type | |
61 | * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to | |
62 | * BITS[45:0] - MMAP offset value | |
63 | * | |
64 | * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these | |
65 | * defines are w.r.t to PAGE_SIZE | |
66 | */ | |
29453755 | 67 | #define KFD_MMAP_TYPE_SHIFT 62 |
df03ef93 HK |
68 | #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) |
69 | #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) | |
70 | #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) | |
71 | #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) | |
d33ea570 | 72 | #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT) |
df03ef93 | 73 | |
29453755 | 74 | #define KFD_MMAP_GPU_ID_SHIFT 46 |
df03ef93 HK |
75 | #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ |
76 | << KFD_MMAP_GPU_ID_SHIFT) | |
77 | #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ | |
78 | & KFD_MMAP_GPU_ID_MASK) | |
29453755 | 79 | #define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ |
df03ef93 HK |
80 | >> KFD_MMAP_GPU_ID_SHIFT) |
81 | ||
ed6e6a34 BG |
82 | /* |
83 | * When working with cp scheduler we should assign the HIQ manually or via | |
e7016d8e | 84 | * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot |
ed6e6a34 BG |
85 | * definitions for Kaveri. In Kaveri only the first ME queues participates |
86 | * in the cp scheduling taking that in mind we set the HIQ slot in the | |
87 | * second ME. | |
88 | */ | |
89 | #define KFD_CIK_HIQ_PIPE 4 | |
90 | #define KFD_CIK_HIQ_QUEUE 0 | |
91 | ||
5b5c4e40 EP |
92 | /* Macro for allocating structures */ |
93 | #define kfd_alloc_struct(ptr_to_struct) \ | |
94 | ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) | |
95 | ||
19f6d2a6 | 96 | #define KFD_MAX_NUM_OF_PROCESSES 512 |
b8cbab04 | 97 | #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 |
19f6d2a6 | 98 | |
373d7080 FK |
99 | /* |
100 | * Size of the per-process TBA+TMA buffer: 2 pages | |
101 | * | |
102 | * The first page is the TBA used for the CWSR ISA code. The second | |
a4497974 | 103 | * page is used as TMA for user-mode trap handler setup in daisy-chain mode. |
373d7080 FK |
104 | */ |
105 | #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) | |
106 | #define KFD_CWSR_TMA_OFFSET PAGE_SIZE | |
107 | ||
74523943 YZ |
108 | #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ |
109 | (KFD_MAX_NUM_OF_PROCESSES * \ | |
110 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) | |
111 | ||
112 | #define KFD_KERNEL_QUEUE_SIZE 2048 | |
113 | ||
14328aa5 PC |
114 | #define KFD_UNMAP_LATENCY_MS (4000) |
115 | ||
a805889a MJ |
116 | #define KFD_MAX_SDMA_QUEUES 128 |
117 | ||
1f86805a YZ |
118 | /* |
119 | * 512 = 0x200 | |
120 | * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the | |
121 | * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA. | |
122 | * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC | |
123 | * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in | |
124 | * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE. | |
125 | */ | |
126 | #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 | |
127 | ||
36988070 RB |
128 | /** |
129 | * enum kfd_ioctl_flags - KFD ioctl flags | |
130 | * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how | |
131 | * userspace can use a given ioctl. | |
132 | */ | |
133 | enum kfd_ioctl_flags { | |
134 | /* | |
135 | * @KFD_IOC_FLAG_CHECKPOINT_RESTORE: | |
136 | * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially | |
137 | * perform privileged operations and load arbitrary data into MQDs and | |
138 | * eventually HQD registers when the queue is mapped by HWS. In order to | |
139 | * prevent this we should perform additional security checks. | |
140 | * | |
141 | * This is equivalent to callers with the CHECKPOINT_RESTORE capability. | |
142 | * | |
143 | * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE, | |
144 | * we also allow ioctls with SYS_ADMIN capability. | |
145 | */ | |
146 | KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0), | |
147 | }; | |
19f6d2a6 | 148 | /* |
b8cbab04 OG |
149 | * Kernel module parameter to specify maximum number of supported queues per |
150 | * device | |
19f6d2a6 | 151 | */ |
b8cbab04 | 152 | extern int max_num_of_queues_per_device; |
19f6d2a6 | 153 | |
ed6e6a34 | 154 | |
31c21fec BG |
155 | /* Kernel module parameter to specify the scheduling policy */ |
156 | extern int sched_policy; | |
157 | ||
a99c6d4f FK |
158 | /* |
159 | * Kernel module parameter to specify the maximum process | |
160 | * number per HW scheduler | |
161 | */ | |
162 | extern int hws_max_conc_proc; | |
163 | ||
373d7080 FK |
164 | extern int cwsr_enable; |
165 | ||
81663016 OG |
166 | /* |
167 | * Kernel module parameter to specify whether to send sigterm to HSA process on | |
168 | * unhandled exception | |
169 | */ | |
170 | extern int send_sigterm; | |
171 | ||
374200b1 FK |
172 | /* |
173 | * This kernel module is used to simulate large bar machine on non-large bar | |
174 | * enabled machines. | |
175 | */ | |
176 | extern int debug_largebar; | |
177 | ||
ebcfd1e2 FK |
178 | /* |
179 | * Ignore CRAT table during KFD initialization, can be used to work around | |
180 | * broken CRAT tables on some AMD systems | |
181 | */ | |
182 | extern int ignore_crat; | |
183 | ||
a4497974 | 184 | /* Set sh_mem_config.retry_disable on GFX v9 */ |
75ee6487 | 185 | extern int amdgpu_noretry; |
bed4f110 | 186 | |
a4497974 | 187 | /* Halt if HWS hang is detected */ |
0e9a860c YZ |
188 | extern int halt_if_hws_hang; |
189 | ||
a4497974 | 190 | /* Whether MEC FW support GWS barriers */ |
29e76462 OZ |
191 | extern bool hws_gws_support; |
192 | ||
a4497974 | 193 | /* Queue preemption timeout in ms */ |
14328aa5 PC |
194 | extern int queue_preemption_timeout_ms; |
195 | ||
6d909c5d OZ |
196 | /* |
197 | * Don't evict process queues on vm fault | |
198 | */ | |
199 | extern int amdgpu_no_queue_eviction_on_vm_fault; | |
200 | ||
a4497974 | 201 | /* Enable eviction debug messages */ |
b2057956 FK |
202 | extern bool debug_evictions; |
203 | ||
fe1f05df MJ |
204 | extern struct mutex kfd_processes_mutex; |
205 | ||
ed6e6a34 BG |
206 | enum cache_policy { |
207 | cache_policy_coherent, | |
208 | cache_policy_noncoherent | |
209 | }; | |
210 | ||
dd0ae064 GS |
211 | #define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0]) |
212 | #define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1))) | |
24294e7b | 213 | #define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\ |
cebbfdd5 AL |
214 | ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \ |
215 | (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))) | |
ef568db7 | 216 | |
8dc1db31 MJ |
217 | struct kfd_node; |
218 | ||
f3a39818 | 219 | struct kfd_event_interrupt_class { |
8dc1db31 | 220 | bool (*interrupt_isr)(struct kfd_node *dev, |
58e69886 LX |
221 | const uint32_t *ih_ring_entry, uint32_t *patched_ihre, |
222 | bool *patched_flag); | |
8dc1db31 | 223 | void (*interrupt_wq)(struct kfd_node *dev, |
58e69886 | 224 | const uint32_t *ih_ring_entry); |
f3a39818 AL |
225 | }; |
226 | ||
4a488a7a | 227 | struct kfd_device_info { |
9d6fa9c7 | 228 | uint32_t gfx_target_version; |
f3a39818 | 229 | const struct kfd_event_interrupt_class *event_interrupt_class; |
4a488a7a | 230 | unsigned int max_pasid_bits; |
992839ad | 231 | unsigned int max_no_of_hqd; |
ada2b29c | 232 | unsigned int doorbell_size; |
4a488a7a | 233 | size_t ih_ring_entry_size; |
f7c826ad | 234 | uint8_t num_of_watch_points; |
19f6d2a6 | 235 | uint16_t mqd_size_aligned; |
373d7080 | 236 | bool supports_cwsr; |
64d1c3a4 | 237 | bool needs_iommu_device; |
3ee2d00c | 238 | bool needs_pci_atomics; |
fb932dfe | 239 | uint32_t no_atomic_fw_version; |
d5094189 | 240 | unsigned int num_sdma_queues_per_engine; |
cc009e61 MJ |
241 | unsigned int num_reserved_sdma_queues_per_engine; |
242 | uint64_t reserved_sdma_queues_bitmap; | |
4a488a7a OG |
243 | }; |
244 | ||
8dc1db31 MJ |
245 | unsigned int kfd_get_num_sdma_engines(struct kfd_node *kdev); |
246 | unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *kdev); | |
ee2f17f4 | 247 | |
36b5c08f OG |
248 | struct kfd_mem_obj { |
249 | uint32_t range_start; | |
250 | uint32_t range_end; | |
251 | uint64_t gpu_addr; | |
252 | uint32_t *cpu_ptr; | |
b91d43dd | 253 | void *gtt_mem; |
36b5c08f OG |
254 | }; |
255 | ||
44008d7a YZ |
256 | struct kfd_vmid_info { |
257 | uint32_t first_vmid_kfd; | |
258 | uint32_t last_vmid_kfd; | |
259 | uint32_t vmid_num_kfd; | |
260 | }; | |
261 | ||
74c5b85d MJ |
262 | #define MAX_KFD_NODES 8 |
263 | ||
8dc1db31 MJ |
264 | struct kfd_dev; |
265 | ||
266 | struct kfd_node { | |
a805889a | 267 | unsigned int node_id; |
8dc1db31 MJ |
268 | struct amdgpu_device *adev; /* Duplicated here along with keeping |
269 | * a copy in kfd_dev to save a hop | |
270 | */ | |
271 | const struct kfd2kgd_calls *kfd2kgd; /* Duplicated here along with | |
272 | * keeping a copy in kfd_dev to | |
273 | * save a hop | |
274 | */ | |
275 | struct kfd_vmid_info vm_info; | |
276 | unsigned int id; /* topology stub index */ | |
a75f2271 LL |
277 | uint32_t xcc_mask; /* Instance mask of XCCs present */ |
278 | struct amdgpu_xcp *xcp; | |
279 | ||
8dc1db31 MJ |
280 | /* Interrupts */ |
281 | struct kfifo ih_fifo; | |
282 | struct workqueue_struct *ih_wq; | |
283 | struct work_struct interrupt_work; | |
284 | spinlock_t interrupt_lock; | |
285 | ||
286 | /* | |
287 | * Interrupts of interest to KFD are copied | |
288 | * from the HW ring into a SW ring. | |
289 | */ | |
290 | bool interrupts_active; | |
5fb34bd9 | 291 | uint32_t interrupt_bitmap; /* Only used for GFX 9.4.3 */ |
8dc1db31 MJ |
292 | |
293 | /* QCM Device instance */ | |
294 | struct device_queue_manager *dqm; | |
295 | ||
296 | /* Global GWS resource shared between processes */ | |
297 | void *gws; | |
298 | bool gws_debug_workaround; | |
299 | ||
300 | /* Clients watching SMI events */ | |
301 | struct list_head smi_clients; | |
302 | spinlock_t smi_lock; | |
303 | uint32_t reset_seq_num; | |
304 | ||
305 | /* SRAM ECC flag */ | |
306 | atomic_t sram_ecc_flag; | |
307 | ||
308 | /*spm process id */ | |
309 | unsigned int spm_pasid; | |
310 | ||
311 | /* Maximum process number mapped to HW scheduler */ | |
312 | unsigned int max_proc_per_quantum; | |
313 | ||
74c5b85d MJ |
314 | unsigned int compute_vmid_bitmap; |
315 | ||
315e29ec MJ |
316 | struct kfd_local_mem_info local_mem_info; |
317 | ||
8dc1db31 MJ |
318 | struct kfd_dev *kfd; |
319 | }; | |
320 | ||
4a488a7a | 321 | struct kfd_dev { |
c6c57446 | 322 | struct amdgpu_device *adev; |
4a488a7a | 323 | |
f0dc99a6 | 324 | struct kfd_device_info device_info; |
4a488a7a | 325 | |
19f6d2a6 OG |
326 | phys_addr_t doorbell_base; /* Start of actual doorbells used by |
327 | * KFD. It is aligned for mapping | |
328 | * into user mode | |
329 | */ | |
339903fa YZ |
330 | size_t doorbell_base_dw_offset; /* Offset from the start of the PCI |
331 | * doorbell BAR to the first KFD | |
332 | * doorbell in dwords. GFX reserves | |
333 | * the segment before this offset. | |
19f6d2a6 | 334 | */ |
19f6d2a6 OG |
335 | u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells |
336 | * page used by kernel queue | |
337 | */ | |
338 | ||
4a488a7a OG |
339 | struct kgd2kfd_shared_resources shared_resources; |
340 | ||
cea405b1 XZ |
341 | const struct kfd2kgd_calls *kfd2kgd; |
342 | struct mutex doorbell_mutex; | |
f761d8bd JP |
343 | DECLARE_BITMAP(doorbell_available_index, |
344 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); | |
cea405b1 | 345 | |
36b5c08f OG |
346 | void *gtt_mem; |
347 | uint64_t gtt_start_gpu_addr; | |
348 | void *gtt_start_cpu_ptr; | |
349 | void *gtt_sa_bitmap; | |
350 | struct mutex gtt_sa_lock; | |
351 | unsigned int gtt_sa_chunk_size; | |
352 | unsigned int gtt_sa_num_of_chunks; | |
353 | ||
ed6e6a34 | 354 | bool init_complete; |
fbeb661b | 355 | |
5ade6c9c FK |
356 | /* Firmware versions */ |
357 | uint16_t mec_fw_version; | |
29633d0e | 358 | uint16_t mec2_fw_version; |
5ade6c9c FK |
359 | uint16_t sdma_fw_version; |
360 | ||
373d7080 FK |
361 | /* CWSR */ |
362 | bool cwsr_enabled; | |
363 | const void *cwsr_isa; | |
364 | unsigned int cwsr_isa_size; | |
0c1690e3 SL |
365 | |
366 | /* xGMI */ | |
367 | uint64_t hive_id; | |
a4497974 | 368 | |
d35f00d8 | 369 | bool pci_atomic_requested; |
9b54d201 | 370 | |
6127896f HR |
371 | /* Use IOMMU v2 flag */ |
372 | bool use_iommu_v2; | |
373 | ||
f756e631 HK |
374 | /* Compute Profile ref. count */ |
375 | atomic_t compute_profile; | |
e09d4fc8 | 376 | |
59d7115d MJ |
377 | struct ida doorbell_ida; |
378 | unsigned int max_doorbell_slices; | |
9b498efa AD |
379 | |
380 | int noretry; | |
814ab993 | 381 | |
74c5b85d MJ |
382 | struct kfd_node *nodes[MAX_KFD_NODES]; |
383 | unsigned int num_nodes; | |
4a488a7a OG |
384 | }; |
385 | ||
19f6d2a6 OG |
386 | enum kfd_mempool { |
387 | KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, | |
388 | KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, | |
389 | KFD_MEMPOOL_FRAMEBUFFER = 3, | |
390 | }; | |
391 | ||
4a488a7a OG |
392 | /* Character device interface */ |
393 | int kfd_chardev_init(void); | |
394 | void kfd_chardev_exit(void); | |
4a488a7a | 395 | |
241f24f8 | 396 | /** |
a4497974 | 397 | * enum kfd_unmap_queues_filter - Enum for queue filters. |
241f24f8 | 398 | * |
7da2bcf8 | 399 | * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the |
241f24f8 BG |
400 | * running queues list. |
401 | * | |
d2cb0b21 JK |
402 | * @KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: Preempts all non-static queues |
403 | * in the run list. | |
404 | * | |
7da2bcf8 | 405 | * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to |
241f24f8 BG |
406 | * specific process. |
407 | * | |
408 | */ | |
7da2bcf8 | 409 | enum kfd_unmap_queues_filter { |
d2cb0b21 JK |
410 | KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES = 1, |
411 | KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES = 2, | |
412 | KFD_UNMAP_QUEUES_FILTER_BY_PASID = 3 | |
241f24f8 | 413 | }; |
19f6d2a6 | 414 | |
ed8aab45 | 415 | /** |
a4497974 | 416 | * enum kfd_queue_type - Enum for various queue types. |
ed8aab45 BG |
417 | * |
418 | * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. | |
419 | * | |
a4497974 | 420 | * @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type. |
ed8aab45 BG |
421 | * |
422 | * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. | |
423 | * | |
424 | * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. | |
a4497974 RB |
425 | * |
426 | * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. | |
ed8aab45 BG |
427 | */ |
428 | enum kfd_queue_type { | |
429 | KFD_QUEUE_TYPE_COMPUTE, | |
430 | KFD_QUEUE_TYPE_SDMA, | |
431 | KFD_QUEUE_TYPE_HIQ, | |
1b4670f6 OZ |
432 | KFD_QUEUE_TYPE_DIQ, |
433 | KFD_QUEUE_TYPE_SDMA_XGMI | |
ed8aab45 BG |
434 | }; |
435 | ||
6e99df57 BG |
436 | enum kfd_queue_format { |
437 | KFD_QUEUE_FORMAT_PM4, | |
438 | KFD_QUEUE_FORMAT_AQL | |
439 | }; | |
440 | ||
0ccbc7cd OZ |
441 | enum KFD_QUEUE_PRIORITY { |
442 | KFD_QUEUE_PRIORITY_MINIMUM = 0, | |
443 | KFD_QUEUE_PRIORITY_MAXIMUM = 15 | |
444 | }; | |
445 | ||
ed8aab45 BG |
446 | /** |
447 | * struct queue_properties | |
448 | * | |
449 | * @type: The queue type. | |
450 | * | |
451 | * @queue_id: Queue identifier. | |
452 | * | |
453 | * @queue_address: Queue ring buffer address. | |
454 | * | |
455 | * @queue_size: Queue ring buffer size. | |
456 | * | |
457 | * @priority: Defines the queue priority relative to other queues in the | |
458 | * process. | |
459 | * This is just an indication and HW scheduling may override the priority as | |
460 | * necessary while keeping the relative prioritization. | |
461 | * the priority granularity is from 0 to f which f is the highest priority. | |
462 | * currently all queues are initialized with the highest priority. | |
463 | * | |
464 | * @queue_percent: This field is partially implemented and currently a zero in | |
465 | * this field defines that the queue is non active. | |
466 | * | |
467 | * @read_ptr: User space address which points to the number of dwords the | |
468 | * cp read from the ring buffer. This field updates automatically by the H/W. | |
469 | * | |
470 | * @write_ptr: Defines the number of dwords written to the ring buffer. | |
471 | * | |
a4497974 RB |
472 | * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring |
473 | * buffer. This field should be similar to write_ptr and the user should | |
474 | * update this field after updating the write_ptr. | |
ed8aab45 BG |
475 | * |
476 | * @doorbell_off: The doorbell offset in the doorbell pci-bar. | |
477 | * | |
8eabaf54 KR |
478 | * @is_interop: Defines if this is a interop queue. Interop queue means that |
479 | * the queue can access both graphics and compute resources. | |
ed8aab45 | 480 | * |
26103436 FK |
481 | * @is_evicted: Defines if the queue is evicted. Only active queues |
482 | * are evicted, rendering them inactive. | |
483 | * | |
484 | * @is_active: Defines if the queue is active or not. @is_active and | |
485 | * @is_evicted are protected by the DQM lock. | |
ed8aab45 | 486 | * |
b8020b03 JG |
487 | * @is_gws: Defines if the queue has been updated to be GWS-capable or not. |
488 | * @is_gws should be protected by the DQM lock, since changing it can yield the | |
489 | * possibility of updating DQM state on number of GWS queues. | |
490 | * | |
ed8aab45 BG |
491 | * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid |
492 | * of the queue. | |
493 | * | |
494 | * This structure represents the queue properties for each queue no matter if | |
495 | * it's user mode or kernel mode queue. | |
496 | * | |
497 | */ | |
8668dfc3 | 498 | |
ed8aab45 BG |
499 | struct queue_properties { |
500 | enum kfd_queue_type type; | |
6e99df57 | 501 | enum kfd_queue_format format; |
ed8aab45 BG |
502 | unsigned int queue_id; |
503 | uint64_t queue_address; | |
504 | uint64_t queue_size; | |
505 | uint32_t priority; | |
506 | uint32_t queue_percent; | |
507 | uint32_t *read_ptr; | |
508 | uint32_t *write_ptr; | |
ada2b29c | 509 | void __iomem *doorbell_ptr; |
ed8aab45 BG |
510 | uint32_t doorbell_off; |
511 | bool is_interop; | |
26103436 | 512 | bool is_evicted; |
ed8aab45 | 513 | bool is_active; |
b8020b03 | 514 | bool is_gws; |
3c8bdb51 | 515 | uint32_t pm4_target_xcc; |
69a8c3ae JK |
516 | bool is_dbg_wa; |
517 | bool is_user_cu_masked; | |
ed8aab45 BG |
518 | /* Not relevant for user mode queues in cp scheduling */ |
519 | unsigned int vmid; | |
77669eb8 BG |
520 | /* Relevant only for sdma queues*/ |
521 | uint32_t sdma_engine_id; | |
522 | uint32_t sdma_queue_id; | |
523 | uint32_t sdma_vm_addr; | |
ff3d04a1 BG |
524 | /* Relevant only for VI */ |
525 | uint64_t eop_ring_buffer_address; | |
526 | uint32_t eop_ring_buffer_size; | |
527 | uint64_t ctx_save_restore_area_address; | |
528 | uint32_t ctx_save_restore_area_size; | |
373d7080 FK |
529 | uint32_t ctl_stack_size; |
530 | uint64_t tba_addr; | |
531 | uint64_t tma_addr; | |
44b87bb0 | 532 | uint64_t exception_status; |
ed8aab45 BG |
533 | }; |
534 | ||
bb2d2128 FK |
535 | #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ |
536 | (q).queue_address != 0 && \ | |
537 | (q).queue_percent > 0 && \ | |
538 | !(q).is_evicted) | |
539 | ||
7c695a2c | 540 | enum mqd_update_flag { |
69a8c3ae JK |
541 | UPDATE_FLAG_DBG_WA_ENABLE = 1, |
542 | UPDATE_FLAG_DBG_WA_DISABLE = 2, | |
7c695a2c LY |
543 | }; |
544 | ||
545 | struct mqd_update_info { | |
546 | union { | |
547 | struct { | |
548 | uint32_t count; /* Must be a multiple of 32 */ | |
549 | uint32_t *ptr; | |
550 | } cu_mask; | |
551 | }; | |
552 | enum mqd_update_flag update_flag; | |
553 | }; | |
c6e559eb | 554 | |
ed8aab45 BG |
555 | /** |
556 | * struct queue | |
557 | * | |
558 | * @list: Queue linked list. | |
559 | * | |
a4497974 | 560 | * @mqd: The queue MQD (memory queue descriptor). |
ed8aab45 BG |
561 | * |
562 | * @mqd_mem_obj: The MQD local gpu memory object. | |
563 | * | |
564 | * @gart_mqd_addr: The MQD gart mc address. | |
565 | * | |
566 | * @properties: The queue properties. | |
567 | * | |
568 | * @mec: Used only in no cp scheduling mode and identifies to micro engine id | |
a4497974 | 569 | * that the queue should be executed on. |
ed8aab45 | 570 | * |
8eabaf54 KR |
571 | * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe |
572 | * id. | |
ed8aab45 BG |
573 | * |
574 | * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. | |
575 | * | |
576 | * @process: The kfd process that created this queue. | |
577 | * | |
578 | * @device: The kfd device that created this queue. | |
579 | * | |
eb82da1d OZ |
580 | * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL |
581 | * otherwise. | |
582 | * | |
ed8aab45 BG |
583 | * This structure represents user mode compute queues. |
584 | * It contains all the necessary data to handle such queues. | |
585 | * | |
586 | */ | |
587 | ||
588 | struct queue { | |
589 | struct list_head list; | |
590 | void *mqd; | |
591 | struct kfd_mem_obj *mqd_mem_obj; | |
592 | uint64_t gart_mqd_addr; | |
593 | struct queue_properties properties; | |
594 | ||
595 | uint32_t mec; | |
596 | uint32_t pipe; | |
597 | uint32_t queue; | |
598 | ||
77669eb8 | 599 | unsigned int sdma_id; |
ef568db7 | 600 | unsigned int doorbell_id; |
77669eb8 | 601 | |
ed8aab45 | 602 | struct kfd_process *process; |
8dc1db31 | 603 | struct kfd_node *device; |
eb82da1d | 604 | void *gws; |
6d220a7e AL |
605 | |
606 | /* procfs */ | |
607 | struct kobject kobj; | |
cc009e61 MJ |
608 | |
609 | void *gang_ctx_bo; | |
610 | uint64_t gang_ctx_gpu_addr; | |
611 | void *gang_ctx_cpu_ptr; | |
e77a541f GS |
612 | |
613 | struct amdgpu_bo *wptr_bo; | |
ed8aab45 BG |
614 | }; |
615 | ||
6e99df57 | 616 | enum KFD_MQD_TYPE { |
d7c0b047 | 617 | KFD_MQD_TYPE_HIQ = 0, /* for hiq */ |
85d258f9 BG |
618 | KFD_MQD_TYPE_CP, /* for cp queues and diq */ |
619 | KFD_MQD_TYPE_SDMA, /* for sdma queues */ | |
59f650a0 | 620 | KFD_MQD_TYPE_DIQ, /* for diq */ |
6e99df57 BG |
621 | KFD_MQD_TYPE_MAX |
622 | }; | |
623 | ||
0ccbc7cd OZ |
624 | enum KFD_PIPE_PRIORITY { |
625 | KFD_PIPE_PRIORITY_CS_LOW = 0, | |
626 | KFD_PIPE_PRIORITY_CS_MEDIUM, | |
627 | KFD_PIPE_PRIORITY_CS_HIGH | |
628 | }; | |
629 | ||
241f24f8 BG |
630 | struct scheduling_resources { |
631 | unsigned int vmid_mask; | |
632 | enum kfd_queue_type type; | |
633 | uint64_t queue_mask; | |
634 | uint64_t gws_mask; | |
635 | uint32_t oac_mask; | |
636 | uint32_t gds_heap_base; | |
637 | uint32_t gds_heap_size; | |
638 | }; | |
639 | ||
640 | struct process_queue_manager { | |
641 | /* data */ | |
642 | struct kfd_process *process; | |
241f24f8 BG |
643 | struct list_head queues; |
644 | unsigned long *queue_slot_bitmap; | |
645 | }; | |
646 | ||
647 | struct qcm_process_device { | |
648 | /* The Device Queue Manager that owns this data */ | |
649 | struct device_queue_manager *dqm; | |
650 | struct process_queue_manager *pqm; | |
241f24f8 BG |
651 | /* Queues list */ |
652 | struct list_head queues_list; | |
653 | struct list_head priv_queue_list; | |
654 | ||
655 | unsigned int queue_count; | |
656 | unsigned int vmid; | |
657 | bool is_debug; | |
26103436 | 658 | unsigned int evicted; /* eviction counter, 0=active */ |
9fd3f1bf FK |
659 | |
660 | /* This flag tells if we should reset all wavefronts on | |
661 | * process termination | |
662 | */ | |
663 | bool reset_wavefronts; | |
664 | ||
b8020b03 JG |
665 | /* This flag tells us if this process has a GWS-capable |
666 | * queue that will be mapped into the runlist. It's | |
667 | * possible to request a GWS BO, but not have the queue | |
668 | * currently mapped, and this changes how the MAP_PROCESS | |
669 | * PM4 packet is configured. | |
670 | */ | |
671 | bool mapped_gws_queue; | |
672 | ||
a4497974 | 673 | /* All the memory management data should be here too */ |
241f24f8 | 674 | uint64_t gds_context_area; |
435e2f97 | 675 | /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */ |
e715c6d0 | 676 | uint64_t page_table_base; |
241f24f8 BG |
677 | uint32_t sh_mem_config; |
678 | uint32_t sh_mem_bases; | |
679 | uint32_t sh_mem_ape1_base; | |
680 | uint32_t sh_mem_ape1_limit; | |
241f24f8 BG |
681 | uint32_t gds_size; |
682 | uint32_t num_gws; | |
683 | uint32_t num_oac; | |
6a1c9510 | 684 | uint32_t sh_hidden_private_base; |
373d7080 FK |
685 | |
686 | /* CWSR memory */ | |
68df0f19 | 687 | struct kgd_mem *cwsr_mem; |
373d7080 | 688 | void *cwsr_kaddr; |
d01994c2 | 689 | uint64_t cwsr_base; |
373d7080 FK |
690 | uint64_t tba_addr; |
691 | uint64_t tma_addr; | |
d01994c2 FK |
692 | |
693 | /* IB memory */ | |
68df0f19 | 694 | struct kgd_mem *ib_mem; |
d01994c2 | 695 | uint64_t ib_base; |
552764b6 | 696 | void *ib_kaddr; |
ef568db7 FK |
697 | |
698 | /* doorbell resources per process per device */ | |
699 | unsigned long *doorbell_bitmap; | |
241f24f8 BG |
700 | }; |
701 | ||
26103436 FK |
702 | /* KFD Memory Eviction */ |
703 | ||
704 | /* Approx. wait time before attempting to restore evicted BOs */ | |
705 | #define PROCESS_RESTORE_TIME_MS 100 | |
706 | /* Approx. back off time if restore fails due to lack of memory */ | |
707 | #define PROCESS_BACK_OFF_TIME_MS 100 | |
708 | /* Approx. time before evicting the process again */ | |
709 | #define PROCESS_ACTIVE_TIME_MS 10 | |
710 | ||
5ec7e028 FK |
711 | /* 8 byte handle containing GPU ID in the most significant 4 bytes and |
712 | * idr_handle in the least significant 4 bytes | |
713 | */ | |
714 | #define MAKE_HANDLE(gpu_id, idr_handle) \ | |
715 | (((uint64_t)(gpu_id) << 32) + idr_handle) | |
716 | #define GET_GPU_ID(handle) (handle >> 32) | |
717 | #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) | |
718 | ||
733fa1f7 YZ |
719 | enum kfd_pdd_bound { |
720 | PDD_UNBOUND = 0, | |
721 | PDD_BOUND, | |
722 | PDD_BOUND_SUSPENDED, | |
723 | }; | |
724 | ||
4327bed2 | 725 | #define MAX_SYSFS_FILENAME_LEN 15 |
32cb59f3 MJ |
726 | |
727 | /* | |
728 | * SDMA counter runs at 100MHz frequency. | |
729 | * We display SDMA activity in microsecond granularity in sysfs. | |
730 | * As a result, the divisor is 100. | |
731 | */ | |
732 | #define SDMA_ACTIVITY_DIVISOR 100 | |
d4566dee | 733 | |
19f6d2a6 OG |
734 | /* Data that is per-process-per device. */ |
735 | struct kfd_process_device { | |
19f6d2a6 | 736 | /* The device that owns this data. */ |
8dc1db31 | 737 | struct kfd_node *dev; |
19f6d2a6 | 738 | |
9fd3f1bf FK |
739 | /* The process that owns this kfd_process_device. */ |
740 | struct kfd_process *process; | |
19f6d2a6 | 741 | |
45102048 BG |
742 | /* per-process-per device QCM data structure */ |
743 | struct qcm_process_device qpd; | |
744 | ||
19f6d2a6 OG |
745 | /*Apertures*/ |
746 | uint64_t lds_base; | |
747 | uint64_t lds_limit; | |
748 | uint64_t gpuvm_base; | |
749 | uint64_t gpuvm_limit; | |
750 | uint64_t scratch_base; | |
751 | uint64_t scratch_limit; | |
752 | ||
403575c4 | 753 | /* VM context for GPUVM allocations */ |
b84394e2 | 754 | struct file *drm_file; |
b40a6ab2 | 755 | void *drm_priv; |
8fde0248 | 756 | atomic64_t tlb_seq; |
403575c4 | 757 | |
52b29d73 FK |
758 | /* GPUVM allocations storage */ |
759 | struct idr alloc_idr; | |
760 | ||
9fd3f1bf FK |
761 | /* Flag used to tell the pdd has dequeued from the dqm. |
762 | * This is used to prevent dev->dqm->ops.process_termination() from | |
763 | * being called twice when it is already called in IOMMU callback | |
764 | * function. | |
a82918f1 | 765 | */ |
9fd3f1bf | 766 | bool already_dequeued; |
9593f4d6 | 767 | bool runtime_inuse; |
64d1c3a4 FK |
768 | |
769 | /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ | |
770 | enum kfd_pdd_bound bound; | |
d4566dee MJ |
771 | |
772 | /* VRAM usage */ | |
773 | uint64_t vram_usage; | |
774 | struct attribute attr_vram; | |
32cb59f3 MJ |
775 | char vram_filename[MAX_SYSFS_FILENAME_LEN]; |
776 | ||
777 | /* SDMA activity tracking */ | |
778 | uint64_t sdma_past_activity_counter; | |
779 | struct attribute attr_sdma; | |
780 | char sdma_filename[MAX_SYSFS_FILENAME_LEN]; | |
4327bed2 PC |
781 | |
782 | /* Eviction activity tracking */ | |
783 | uint64_t last_evict_timestamp; | |
784 | atomic64_t evict_duration_counter; | |
785 | struct attribute attr_evict; | |
786 | ||
787 | struct kobject *kobj_stats; | |
59d7115d | 788 | unsigned int doorbell_index; |
f2fa07b3 RE |
789 | |
790 | /* | |
791 | * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process | |
792 | * that is associated with device encoded by "this" struct instance. The | |
793 | * value reflects CU usage by all of the waves launched by this process | |
794 | * on this device. A very important property of occupancy parameter is | |
795 | * that its value is a snapshot of current use. | |
796 | * | |
797 | * Following is to be noted regarding how this parameter is reported: | |
798 | * | |
799 | * The number of waves that a CU can launch is limited by couple of | |
800 | * parameters. These are encoded by struct amdgpu_cu_info instance | |
801 | * that is part of every device definition. For GFX9 devices this | |
802 | * translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves | |
803 | * do not use scratch memory and 32 waves (max_scratch_slots_per_cu) | |
804 | * when they do use scratch memory. This could change for future | |
805 | * devices and therefore this example should be considered as a guide. | |
806 | * | |
807 | * All CU's of a device are available for the process. This may not be true | |
808 | * under certain conditions - e.g. CU masking. | |
809 | * | |
810 | * Finally number of CU's that are occupied by a process is affected by both | |
811 | * number of CU's a device has along with number of other competing processes | |
812 | */ | |
813 | struct attribute attr_cu_occupancy; | |
751580b3 PY |
814 | |
815 | /* sysfs counters for GPU retry fault and page migration tracking */ | |
816 | struct kobject *kobj_counters; | |
817 | struct attribute attr_faults; | |
818 | struct attribute attr_page_in; | |
819 | struct attribute attr_page_out; | |
820 | uint64_t faults; | |
821 | uint64_t page_in; | |
822 | uint64_t page_out; | |
0de4ec9a | 823 | |
44b87bb0 JK |
824 | /* Exception code status*/ |
825 | uint64_t exception_status; | |
826 | void *vm_fault_exc_data; | |
827 | size_t vm_fault_exc_data_size; | |
828 | ||
0de4ec9a JK |
829 | /* Tracks debug per-vmid request settings */ |
830 | uint32_t spi_dbg_override; | |
831 | uint32_t spi_dbg_launch_mode; | |
832 | uint32_t watch_points[4]; | |
833 | ||
bef153b7 DYS |
834 | /* |
835 | * If this process has been checkpointed before, then the user | |
836 | * application will use the original gpu_id on the | |
837 | * checkpointed node to refer to this device. | |
838 | */ | |
839 | uint32_t user_gpu_id; | |
cc009e61 MJ |
840 | |
841 | void *proc_ctx_bo; | |
842 | uint64_t proc_ctx_gpu_addr; | |
843 | void *proc_ctx_cpu_ptr; | |
19f6d2a6 OG |
844 | }; |
845 | ||
52a5fdce AS |
846 | #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) |
847 | ||
42de677f PY |
848 | struct svm_range_list { |
849 | struct mutex lock; | |
850 | struct rb_root_cached objects; | |
851 | struct list_head list; | |
4683cfec PY |
852 | struct work_struct deferred_list_work; |
853 | struct list_head deferred_range_list; | |
c2db32ce | 854 | struct list_head criu_svm_metadata_list; |
4683cfec | 855 | spinlock_t deferred_list_lock; |
8a7c184a | 856 | atomic_t evicted_ranges; |
2e447728 | 857 | atomic_t drain_pagefaults; |
8a7c184a | 858 | struct delayed_work restore_work; |
5a75ea56 | 859 | DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); |
2243f493 | 860 | struct task_struct *faulting_task; |
42de677f PY |
861 | }; |
862 | ||
4a488a7a OG |
863 | /* Process data */ |
864 | struct kfd_process { | |
19f6d2a6 OG |
865 | /* |
866 | * kfd_process are stored in an mm_struct*->kfd_process* | |
867 | * hash table (kfd_processes in kfd_process.c) | |
868 | */ | |
869 | struct hlist_node kfd_processes; | |
870 | ||
9b56bb11 FK |
871 | /* |
872 | * Opaque pointer to mm_struct. We don't hold a reference to | |
873 | * it so it should never be dereferenced from here. This is | |
874 | * only used for looking up processes by their mm. | |
875 | */ | |
876 | void *mm; | |
19f6d2a6 | 877 | |
5ce10687 FK |
878 | struct kref ref; |
879 | struct work_struct release_work; | |
880 | ||
19f6d2a6 OG |
881 | struct mutex mutex; |
882 | ||
883 | /* | |
884 | * In any process, the thread that started main() is the lead | |
885 | * thread and outlives the rest. | |
886 | * It is here because amd_iommu_bind_pasid wants a task_struct. | |
894a8293 FK |
887 | * It can also be used for safely getting a reference to the |
888 | * mm_struct of the process. | |
19f6d2a6 OG |
889 | */ |
890 | struct task_struct *lead_thread; | |
891 | ||
892 | /* We want to receive a notification when the mm_struct is destroyed */ | |
893 | struct mmu_notifier mmu_notifier; | |
894 | ||
c7b6bac9 | 895 | u32 pasid; |
19f6d2a6 OG |
896 | |
897 | /* | |
6ae27841 | 898 | * Array of kfd_process_device pointers, |
19f6d2a6 OG |
899 | * one for each device the process is using. |
900 | */ | |
6ae27841 AS |
901 | struct kfd_process_device *pdds[MAX_GPU_INSTANCE]; |
902 | uint32_t n_pdds; | |
19f6d2a6 | 903 | |
45102048 BG |
904 | struct process_queue_manager pqm; |
905 | ||
19f6d2a6 OG |
906 | /*Is the user space process 32 bit?*/ |
907 | bool is_32bit_user_mode; | |
f3a39818 AL |
908 | |
909 | /* Event-related data */ | |
910 | struct mutex event_mutex; | |
482f0777 FK |
911 | /* Event ID allocator and lookup */ |
912 | struct idr event_idr; | |
50cb7dd9 | 913 | /* Event page */ |
68df0f19 | 914 | u64 signal_handle; |
50cb7dd9 | 915 | struct kfd_signal_page *signal_page; |
b9a5d0a5 | 916 | size_t signal_mapped_size; |
f3a39818 | 917 | size_t signal_event_count; |
c986169f | 918 | bool signal_event_limit_reached; |
403575c4 FK |
919 | |
920 | /* Information used for memory eviction */ | |
921 | void *kgd_process_info; | |
922 | /* Eviction fence that is attached to all the BOs of this process. The | |
923 | * fence will be triggered during eviction and new one will be created | |
924 | * during restore | |
925 | */ | |
926 | struct dma_fence *ef; | |
26103436 FK |
927 | |
928 | /* Work items for evicting and restoring BOs */ | |
929 | struct delayed_work eviction_work; | |
930 | struct delayed_work restore_work; | |
931 | /* seqno of the last scheduled eviction */ | |
932 | unsigned int last_eviction_seqno; | |
933 | /* Approx. the last timestamp (in jiffies) when the process was | |
934 | * restored after an eviction | |
935 | */ | |
936 | unsigned long last_restore_timestamp; | |
de9f26bb | 937 | |
0ab2d753 JK |
938 | /* Indicates device process is debug attached with reserved vmid. */ |
939 | bool debug_trap_enabled; | |
940 | ||
941 | /* per-process-per device debug event fd file */ | |
942 | struct file *dbg_ev_file; | |
943 | ||
944 | /* If the process is a kfd debugger, we need to know so we can clean | |
945 | * up at exit time. If a process enables debugging on itself, it does | |
946 | * its own clean-up, so we don't set the flag here. We track this by | |
947 | * counting the number of processes this process is debugging. | |
948 | */ | |
949 | atomic_t debugged_process_count; | |
950 | ||
951 | /* If the process is a debugged, this is the debugger process */ | |
952 | struct kfd_process *debugger_process; | |
953 | ||
de9f26bb KR |
954 | /* Kobj for our procfs */ |
955 | struct kobject *kobj; | |
6d220a7e | 956 | struct kobject *kobj_queues; |
de9f26bb | 957 | struct attribute attr_pasid; |
40ce74d1 | 958 | |
0ab2d753 JK |
959 | /* Keep track cwsr init */ |
960 | bool has_cwsr; | |
961 | ||
962 | /* Exception code enable mask and status */ | |
963 | uint64_t exception_enable_mask; | |
44b87bb0 | 964 | uint64_t exception_status; |
0ab2d753 | 965 | |
42de677f PY |
966 | /* shared virtual memory registered by this process */ |
967 | struct svm_range_list svms; | |
063e33c5 AS |
968 | |
969 | bool xnack_enabled; | |
b6485bed | 970 | |
44b87bb0 JK |
971 | /* Work area for debugger event writer worker. */ |
972 | struct work_struct debug_event_workarea; | |
973 | ||
0de4ec9a JK |
974 | /* Tracks debug per-vmid request for debug flags */ |
975 | bool dbg_flags; | |
976 | ||
b6485bed | 977 | atomic_t poison; |
cd9f7910 DYS |
978 | /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */ |
979 | bool queues_paused; | |
0ab2d753 JK |
980 | |
981 | /* Tracks runtime enable status */ | |
c2d2588c | 982 | struct semaphore runtime_enable_sema; |
455227c4 | 983 | bool is_runtime_retry; |
0ab2d753 JK |
984 | struct kfd_runtime_info runtime_info; |
985 | ||
4a488a7a OG |
986 | }; |
987 | ||
64d1c3a4 FK |
988 | #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ |
989 | extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); | |
990 | extern struct srcu_struct kfd_processes_srcu; | |
991 | ||
76baee6c | 992 | /** |
a4497974 RB |
993 | * typedef amdkfd_ioctl_t - typedef for ioctl function pointer. |
994 | * | |
995 | * @filep: pointer to file structure. | |
996 | * @p: amdkfd process pointer. | |
997 | * @data: pointer to arg that was copied from user. | |
76baee6c | 998 | * |
a4497974 | 999 | * Return: returns ioctl completion code. |
76baee6c OG |
1000 | */ |
1001 | typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, | |
1002 | void *data); | |
1003 | ||
1004 | struct amdkfd_ioctl_desc { | |
1005 | unsigned int cmd; | |
1006 | int flags; | |
1007 | amdkfd_ioctl_t *func; | |
1008 | unsigned int cmd_drv; | |
1009 | const char *name; | |
1010 | }; | |
8dc1db31 | 1011 | bool kfd_dev_is_large_bar(struct kfd_node *dev); |
76baee6c | 1012 | |
1679ae8f | 1013 | int kfd_process_create_wq(void); |
19f6d2a6 | 1014 | void kfd_process_destroy_wq(void); |
22e3d934 | 1015 | void kfd_cleanup_processes(void); |
0ab2d753 | 1016 | struct kfd_process *kfd_create_process(struct task_struct *thread); |
2243f493 | 1017 | struct kfd_process *kfd_get_process(const struct task_struct *task); |
c7b6bac9 | 1018 | struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); |
26103436 | 1019 | struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); |
2aeb742b AS |
1020 | |
1021 | int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); | |
5fb34bd9 AS |
1022 | int kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, |
1023 | uint32_t *gpuid, uint32_t *gpuidx); | |
2aeb742b AS |
1024 | static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, |
1025 | uint32_t gpuidx, uint32_t *gpuid) { | |
1026 | return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL; | |
1027 | } | |
1028 | static inline struct kfd_process_device *kfd_process_device_from_gpuidx( | |
1029 | struct kfd_process *p, uint32_t gpuidx) { | |
1030 | return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL; | |
1031 | } | |
1032 | ||
abb208a8 | 1033 | void kfd_unref_process(struct kfd_process *p); |
c7f21978 | 1034 | int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger); |
6b95e797 | 1035 | int kfd_process_restore_queues(struct kfd_process *p); |
26103436 FK |
1036 | void kfd_suspend_all_processes(void); |
1037 | int kfd_resume_all_processes(void); | |
19f6d2a6 | 1038 | |
bef153b7 DYS |
1039 | struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process, |
1040 | uint32_t gpu_id); | |
1041 | ||
1042 | int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id); | |
1043 | ||
b84394e2 FK |
1044 | int kfd_process_device_init_vm(struct kfd_process_device *pdd, |
1045 | struct file *drm_file); | |
8dc1db31 | 1046 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, |
733fa1f7 | 1047 | struct kfd_process *p); |
8dc1db31 | 1048 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev, |
093c7d8c | 1049 | struct kfd_process *p); |
8dc1db31 | 1050 | struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, |
093c7d8c | 1051 | struct kfd_process *p); |
19f6d2a6 | 1052 | |
063e33c5 AS |
1053 | bool kfd_process_xnack_mode(struct kfd_process *p, bool supported); |
1054 | ||
8dc1db31 | 1055 | int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, |
373d7080 FK |
1056 | struct vm_area_struct *vma); |
1057 | ||
52b29d73 FK |
1058 | /* KFD process API for creating and translating handles */ |
1059 | int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, | |
1060 | void *mem); | |
1061 | void *kfd_process_device_translate_handle(struct kfd_process_device *p, | |
1062 | int handle); | |
1063 | void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, | |
1064 | int handle); | |
011bbb03 | 1065 | struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid); |
52b29d73 | 1066 | |
19f6d2a6 OG |
1067 | /* PASIDs */ |
1068 | int kfd_pasid_init(void); | |
1069 | void kfd_pasid_exit(void); | |
1070 | bool kfd_set_pasid_limit(unsigned int new_limit); | |
1071 | unsigned int kfd_get_pasid_limit(void); | |
c7b6bac9 FY |
1072 | u32 kfd_pasid_alloc(void); |
1073 | void kfd_pasid_free(u32 pasid); | |
19f6d2a6 OG |
1074 | |
1075 | /* Doorbells */ | |
ef568db7 | 1076 | size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); |
735df2ba FK |
1077 | int kfd_doorbell_init(struct kfd_dev *kfd); |
1078 | void kfd_doorbell_fini(struct kfd_dev *kfd); | |
8dc1db31 | 1079 | int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process, |
df03ef93 | 1080 | struct vm_area_struct *vma); |
ada2b29c | 1081 | void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, |
19f6d2a6 OG |
1082 | unsigned int *doorbell_off); |
1083 | void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); | |
1084 | u32 read_kernel_doorbell(u32 __iomem *db); | |
ada2b29c | 1085 | void write_kernel_doorbell(void __iomem *db, u32 value); |
9d7d0248 | 1086 | void write_kernel_doorbell64(void __iomem *db, u64 value); |
339903fa | 1087 | unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, |
59d7115d | 1088 | struct kfd_process_device *pdd, |
ef568db7 | 1089 | unsigned int doorbell_id); |
59d7115d MJ |
1090 | phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd); |
1091 | int kfd_alloc_process_doorbells(struct kfd_dev *kfd, | |
1092 | unsigned int *doorbell_index); | |
1093 | void kfd_free_process_doorbells(struct kfd_dev *kfd, | |
1094 | unsigned int doorbell_index); | |
6e81090b OG |
1095 | /* GTT Sub-Allocator */ |
1096 | ||
8dc1db31 | 1097 | int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size, |
6e81090b OG |
1098 | struct kfd_mem_obj **mem_obj); |
1099 | ||
8dc1db31 | 1100 | int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj); |
6e81090b | 1101 | |
4a488a7a OG |
1102 | extern struct device *kfd_device; |
1103 | ||
de9f26bb KR |
1104 | /* KFD's procfs */ |
1105 | void kfd_procfs_init(void); | |
1106 | void kfd_procfs_shutdown(void); | |
6d220a7e AL |
1107 | int kfd_procfs_add_queue(struct queue *q); |
1108 | void kfd_procfs_del_queue(struct queue *q); | |
de9f26bb | 1109 | |
5b5c4e40 EP |
1110 | /* Topology */ |
1111 | int kfd_topology_init(void); | |
1112 | void kfd_topology_shutdown(void); | |
8dc1db31 MJ |
1113 | int kfd_topology_add_device(struct kfd_node *gpu); |
1114 | int kfd_topology_remove_device(struct kfd_node *gpu); | |
3a87177e HK |
1115 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain( |
1116 | uint32_t proximity_domain); | |
46d18d51 MJ |
1117 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( |
1118 | uint32_t proximity_domain); | |
44d8cc6f | 1119 | struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); |
8dc1db31 MJ |
1120 | struct kfd_node *kfd_device_by_id(uint32_t gpu_id); |
1121 | struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev); | |
f5fe7edf MJ |
1122 | static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id, |
1123 | uint32_t vmid) | |
5fb34bd9 | 1124 | { |
f5fe7edf MJ |
1125 | return (node->interrupt_bitmap & (1 << node_id)) != 0 && |
1126 | (node->compute_vmid_bitmap & (1 << vmid)) != 0; | |
5fb34bd9 AS |
1127 | } |
1128 | static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, | |
f5fe7edf | 1129 | uint32_t node_id, uint32_t vmid) { |
5fb34bd9 AS |
1130 | struct kfd_dev *dev = adev->kfd.dev; |
1131 | uint32_t i; | |
1132 | ||
1133 | if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) | |
1134 | return dev->nodes[0]; | |
1135 | ||
1136 | for (i = 0; i < dev->num_nodes; i++) | |
f5fe7edf | 1137 | if (kfd_irq_is_from_node(dev->nodes[i], node_id, vmid)) |
5fb34bd9 AS |
1138 | return dev->nodes[i]; |
1139 | ||
1140 | return NULL; | |
1141 | } | |
8dc1db31 | 1142 | int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev); |
520b8fb7 | 1143 | int kfd_numa_node_to_apic_id(int numa_node_id); |
6127896f | 1144 | void kfd_double_confirm_iommu_support(struct kfd_dev *gpu); |
5b5c4e40 | 1145 | |
4a488a7a | 1146 | /* Interrupts */ |
8dc1db31 MJ |
1147 | int kfd_interrupt_init(struct kfd_node *dev); |
1148 | void kfd_interrupt_exit(struct kfd_node *dev); | |
1149 | bool enqueue_ih_ring_entry(struct kfd_node *kfd, const void *ih_ring_entry); | |
1150 | bool interrupt_is_wanted(struct kfd_node *dev, | |
58e69886 LX |
1151 | const uint32_t *ih_ring_entry, |
1152 | uint32_t *patched_ihre, bool *flag); | |
4a488a7a | 1153 | |
19f6d2a6 OG |
1154 | /* amdkfd Apertures */ |
1155 | int kfd_init_apertures(struct kfd_process *process); | |
1156 | ||
7c9631af JC |
1157 | void kfd_process_set_trap_handler(struct qcm_process_device *qpd, |
1158 | uint64_t tba_addr, | |
1159 | uint64_t tma_addr); | |
50cff45e JC |
1160 | void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd, |
1161 | bool enabled); | |
7c9631af | 1162 | |
0ab2d753 JK |
1163 | /* CWSR initialization */ |
1164 | int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file *filep); | |
1165 | ||
36988070 RB |
1166 | /* CRIU */ |
1167 | /* | |
1168 | * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private | |
1169 | * structures: | |
1170 | * kfd_criu_process_priv_data | |
1171 | * kfd_criu_device_priv_data | |
1172 | * kfd_criu_bo_priv_data | |
1173 | * kfd_criu_queue_priv_data | |
1174 | * kfd_criu_event_priv_data | |
1175 | * kfd_criu_svm_range_priv_data | |
1176 | */ | |
1177 | ||
1178 | #define KFD_CRIU_PRIV_VERSION 1 | |
1179 | ||
1180 | struct kfd_criu_process_priv_data { | |
1181 | uint32_t version; | |
4717fe3d | 1182 | uint32_t xnack_mode; |
36988070 RB |
1183 | }; |
1184 | ||
1185 | struct kfd_criu_device_priv_data { | |
1186 | /* For future use */ | |
1187 | uint64_t reserved; | |
1188 | }; | |
1189 | ||
1190 | struct kfd_criu_bo_priv_data { | |
5ccbb057 RB |
1191 | uint64_t user_addr; |
1192 | uint32_t idr_handle; | |
1193 | uint32_t mapped_gpuids[MAX_GPU_INSTANCE]; | |
36988070 RB |
1194 | }; |
1195 | ||
626f7b31 DYS |
1196 | /* |
1197 | * The first 4 bytes of kfd_criu_queue_priv_data, kfd_criu_event_priv_data, | |
1198 | * kfd_criu_svm_range_priv_data is the object type | |
1199 | */ | |
1200 | enum kfd_criu_object_type { | |
1201 | KFD_CRIU_OBJECT_TYPE_QUEUE, | |
1202 | KFD_CRIU_OBJECT_TYPE_EVENT, | |
1203 | KFD_CRIU_OBJECT_TYPE_SVM_RANGE, | |
1204 | }; | |
1205 | ||
36988070 RB |
1206 | struct kfd_criu_svm_range_priv_data { |
1207 | uint32_t object_type; | |
08a987a8 RB |
1208 | uint64_t start_addr; |
1209 | uint64_t size; | |
1210 | /* Variable length array of attributes */ | |
d5c83156 | 1211 | struct kfd_ioctl_svm_attribute attrs[]; |
36988070 RB |
1212 | }; |
1213 | ||
1214 | struct kfd_criu_queue_priv_data { | |
1215 | uint32_t object_type; | |
626f7b31 DYS |
1216 | uint64_t q_address; |
1217 | uint64_t q_size; | |
1218 | uint64_t read_ptr_addr; | |
1219 | uint64_t write_ptr_addr; | |
1220 | uint64_t doorbell_off; | |
1221 | uint64_t eop_ring_buffer_address; | |
1222 | uint64_t ctx_save_restore_area_address; | |
1223 | uint32_t gpu_id; | |
1224 | uint32_t type; | |
1225 | uint32_t format; | |
1226 | uint32_t q_id; | |
1227 | uint32_t priority; | |
1228 | uint32_t q_percent; | |
1229 | uint32_t doorbell_id; | |
747eea07 | 1230 | uint32_t gws; |
626f7b31 DYS |
1231 | uint32_t sdma_id; |
1232 | uint32_t eop_ring_buffer_size; | |
1233 | uint32_t ctx_save_restore_area_size; | |
1234 | uint32_t ctl_stack_size; | |
1235 | uint32_t mqd_size; | |
36988070 RB |
1236 | }; |
1237 | ||
1238 | struct kfd_criu_event_priv_data { | |
1239 | uint32_t object_type; | |
40e8a766 DYS |
1240 | uint64_t user_handle; |
1241 | uint32_t event_id; | |
1242 | uint32_t auto_reset; | |
1243 | uint32_t type; | |
1244 | uint32_t signaled; | |
1245 | ||
1246 | union { | |
1247 | struct kfd_hsa_memory_exception_data memory_exception_data; | |
1248 | struct kfd_hsa_hw_exception_data hw_exception_data; | |
1249 | }; | |
36988070 RB |
1250 | }; |
1251 | ||
626f7b31 DYS |
1252 | int kfd_process_get_queue_info(struct kfd_process *p, |
1253 | uint32_t *num_queues, | |
1254 | uint64_t *priv_data_sizes); | |
1255 | ||
1256 | int kfd_criu_checkpoint_queues(struct kfd_process *p, | |
1257 | uint8_t __user *user_priv_data, | |
1258 | uint64_t *priv_data_offset); | |
1259 | ||
1260 | int kfd_criu_restore_queue(struct kfd_process *p, | |
1261 | uint8_t __user *user_priv_data, | |
1262 | uint64_t *priv_data_offset, | |
1263 | uint64_t max_priv_data_size); | |
40e8a766 DYS |
1264 | |
1265 | int kfd_criu_checkpoint_events(struct kfd_process *p, | |
1266 | uint8_t __user *user_priv_data, | |
1267 | uint64_t *priv_data_offset); | |
1268 | ||
1269 | int kfd_criu_restore_event(struct file *devkfd, | |
1270 | struct kfd_process *p, | |
1271 | uint8_t __user *user_priv_data, | |
1272 | uint64_t *priv_data_offset, | |
1273 | uint64_t max_priv_data_size); | |
36988070 RB |
1274 | /* CRIU - End */ |
1275 | ||
ed6e6a34 | 1276 | /* Queue Context Management */ |
e88a614c | 1277 | int init_queue(struct queue **q, const struct queue_properties *properties); |
ed6e6a34 | 1278 | void uninit_queue(struct queue *q); |
45102048 | 1279 | void print_queue_properties(struct queue_properties *q); |
ed6e6a34 BG |
1280 | void print_queue(struct queue *q); |
1281 | ||
4b8f589b | 1282 | struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, |
8dc1db31 | 1283 | struct kfd_node *dev); |
ee04955a | 1284 | struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, |
8dc1db31 | 1285 | struct kfd_node *dev); |
4b8f589b | 1286 | struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, |
8dc1db31 | 1287 | struct kfd_node *dev); |
ee04955a | 1288 | struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, |
8dc1db31 | 1289 | struct kfd_node *dev); |
b91d43dd | 1290 | struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, |
8dc1db31 | 1291 | struct kfd_node *dev); |
14328aa5 | 1292 | struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, |
8dc1db31 | 1293 | struct kfd_node *dev); |
cc009e61 | 1294 | struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, |
8dc1db31 MJ |
1295 | struct kfd_node *dev); |
1296 | struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev); | |
64c7f8cf | 1297 | void device_queue_manager_uninit(struct device_queue_manager *dqm); |
8dc1db31 | 1298 | struct kernel_queue *kernel_queue_init(struct kfd_node *dev, |
241f24f8 | 1299 | enum kfd_queue_type type); |
c2a77fde | 1300 | void kernel_queue_uninit(struct kernel_queue *kq, bool hanging); |
03e5b167 | 1301 | int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid); |
241f24f8 | 1302 | |
45102048 BG |
1303 | /* Process Queue Manager */ |
1304 | struct process_queue_node { | |
1305 | struct queue *q; | |
1306 | struct kernel_queue *kq; | |
1307 | struct list_head process_queue_list; | |
1308 | }; | |
1309 | ||
9fd3f1bf FK |
1310 | void kfd_process_dequeue_from_device(struct kfd_process_device *pdd); |
1311 | void kfd_process_dequeue_from_all_devices(struct kfd_process *p); | |
45102048 BG |
1312 | int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); |
1313 | void pqm_uninit(struct process_queue_manager *pqm); | |
1314 | int pqm_create_queue(struct process_queue_manager *pqm, | |
8dc1db31 | 1315 | struct kfd_node *dev, |
45102048 BG |
1316 | struct file *f, |
1317 | struct queue_properties *properties, | |
e47a8b52 | 1318 | unsigned int *qid, |
e77a541f | 1319 | struct amdgpu_bo *wptr_bo, |
8668dfc3 | 1320 | const struct kfd_criu_queue_priv_data *q_data, |
42c6c482 | 1321 | const void *restore_mqd, |
3a9822d7 | 1322 | const void *restore_ctl_stack, |
e47a8b52 | 1323 | uint32_t *p_doorbell_offset_in_process); |
45102048 | 1324 | int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); |
7c695a2c | 1325 | int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid, |
39e7f331 | 1326 | struct queue_properties *p); |
7c695a2c LY |
1327 | int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid, |
1328 | struct mqd_update_info *minfo); | |
eb82da1d OZ |
1329 | int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, |
1330 | void *gws); | |
fbeb661b YS |
1331 | struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, |
1332 | unsigned int qid); | |
5bb4b78b OZ |
1333 | struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, |
1334 | unsigned int qid); | |
5df099e8 JC |
1335 | int pqm_get_wave_state(struct process_queue_manager *pqm, |
1336 | unsigned int qid, | |
1337 | void __user *ctl_stack, | |
1338 | u32 *ctl_stack_used_size, | |
1339 | u32 *save_area_used_size); | |
45102048 | 1340 | |
b010affe QH |
1341 | int amdkfd_fence_wait_timeout(uint64_t *fence_addr, |
1342 | uint64_t fence_value, | |
14328aa5 | 1343 | unsigned int timeout_ms); |
788bf83d | 1344 | |
42c6c482 DYS |
1345 | int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, |
1346 | unsigned int qid, | |
3a9822d7 DYS |
1347 | u32 *mqd_size, |
1348 | u32 *ctl_stack_size); | |
ed6e6a34 BG |
1349 | /* Packet Manager */ |
1350 | ||
64c7f8cf BG |
1351 | #define KFD_FENCE_COMPLETED (100) |
1352 | #define KFD_FENCE_INIT (10) | |
241f24f8 | 1353 | |
ed6e6a34 BG |
1354 | struct packet_manager { |
1355 | struct device_queue_manager *dqm; | |
1356 | struct kernel_queue *priv_queue; | |
1357 | struct mutex lock; | |
1358 | bool allocated; | |
1359 | struct kfd_mem_obj *ib_buffer_obj; | |
851a645e | 1360 | unsigned int ib_size_bytes; |
819ec5ac | 1361 | bool is_over_subscription; |
f6e27ff1 FK |
1362 | |
1363 | const struct packet_manager_funcs *pmf; | |
1364 | }; | |
1365 | ||
1366 | struct packet_manager_funcs { | |
1367 | /* Support ASIC-specific packet formats for PM4 packets */ | |
1368 | int (*map_process)(struct packet_manager *pm, uint32_t *buffer, | |
1369 | struct qcm_process_device *qpd); | |
1370 | int (*runlist)(struct packet_manager *pm, uint32_t *buffer, | |
1371 | uint64_t ib, size_t ib_size_in_dwords, bool chain); | |
1372 | int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, | |
1373 | struct scheduling_resources *res); | |
1374 | int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, | |
1375 | struct queue *q, bool is_static); | |
1376 | int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, | |
f6e27ff1 | 1377 | enum kfd_unmap_queues_filter mode, |
d2cb0b21 | 1378 | uint32_t filter_param, bool reset); |
7cee6a68 JK |
1379 | int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, |
1380 | uint32_t grace_period); | |
f6e27ff1 | 1381 | int (*query_status)(struct packet_manager *pm, uint32_t *buffer, |
b010affe | 1382 | uint64_t fence_address, uint64_t fence_value); |
f6e27ff1 FK |
1383 | int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); |
1384 | ||
1385 | /* Packet sizes */ | |
1386 | int map_process_size; | |
1387 | int runlist_size; | |
1388 | int set_resources_size; | |
1389 | int map_queues_size; | |
1390 | int unmap_queues_size; | |
7cee6a68 | 1391 | int set_grace_period_size; |
f6e27ff1 FK |
1392 | int query_status_size; |
1393 | int release_mem_size; | |
ed6e6a34 BG |
1394 | }; |
1395 | ||
f6e27ff1 | 1396 | extern const struct packet_manager_funcs kfd_vi_pm_funcs; |
454150b1 | 1397 | extern const struct packet_manager_funcs kfd_v9_pm_funcs; |
fd6a440e | 1398 | extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs; |
f6e27ff1 | 1399 | |
64c7f8cf | 1400 | int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); |
c2a77fde | 1401 | void pm_uninit(struct packet_manager *pm, bool hanging); |
64c7f8cf BG |
1402 | int pm_send_set_resources(struct packet_manager *pm, |
1403 | struct scheduling_resources *res); | |
1404 | int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); | |
1405 | int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, | |
b010affe | 1406 | uint64_t fence_value); |
64c7f8cf | 1407 | |
d2cb0b21 | 1408 | int pm_send_unmap_queue(struct packet_manager *pm, |
7da2bcf8 | 1409 | enum kfd_unmap_queues_filter mode, |
d2cb0b21 | 1410 | uint32_t filter_param, bool reset); |
64c7f8cf | 1411 | |
241f24f8 BG |
1412 | void pm_release_ib(struct packet_manager *pm); |
1413 | ||
7cee6a68 JK |
1414 | int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period); |
1415 | ||
454150b1 FK |
1416 | /* Following PM funcs can be shared among VI and AI */ |
1417 | unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); | |
454150b1 | 1418 | |
19f6d2a6 | 1419 | uint64_t kfd_get_number_elems(struct kfd_dev *kfd); |
19f6d2a6 | 1420 | |
f3a39818 AL |
1421 | /* Events */ |
1422 | extern const struct kfd_event_interrupt_class event_interrupt_class_cik; | |
ca750681 | 1423 | extern const struct kfd_event_interrupt_class event_interrupt_class_v9; |
cc009e61 | 1424 | extern const struct kfd_event_interrupt_class event_interrupt_class_v11; |
ca750681 | 1425 | |
930c5ff4 | 1426 | extern const struct kfd_device_global_init_class device_global_init_class_cik; |
f3a39818 | 1427 | |
c3eb12df | 1428 | int kfd_event_init_process(struct kfd_process *p); |
f3a39818 AL |
1429 | void kfd_event_free_process(struct kfd_process *p); |
1430 | int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); | |
1431 | int kfd_wait_on_events(struct kfd_process *p, | |
59d3e8be | 1432 | uint32_t num_events, void __user *data, |
bea9a56a | 1433 | bool all, uint32_t *user_timeout_ms, |
fdf0c833 | 1434 | uint32_t *wait_result); |
c7b6bac9 | 1435 | void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, |
f3a39818 | 1436 | uint32_t valid_id_bits); |
8dc1db31 | 1437 | void kfd_signal_iommu_event(struct kfd_node *dev, |
c7b6bac9 FY |
1438 | u32 pasid, unsigned long address, |
1439 | bool is_write_requested, bool is_execute_requested); | |
1440 | void kfd_signal_hw_exception_event(u32 pasid); | |
f3a39818 AL |
1441 | int kfd_set_event(struct kfd_process *p, uint32_t event_id); |
1442 | int kfd_reset_event(struct kfd_process *p, uint32_t event_id); | |
40e8a766 DYS |
1443 | int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset); |
1444 | ||
f3a39818 AL |
1445 | int kfd_event_create(struct file *devkfd, struct kfd_process *p, |
1446 | uint32_t event_type, bool auto_reset, uint32_t node_id, | |
1447 | uint32_t *event_id, uint32_t *event_trigger_data, | |
1448 | uint64_t *event_page_offset, uint32_t *event_slot_index); | |
40e8a766 DYS |
1449 | |
1450 | int kfd_get_num_events(struct kfd_process *p); | |
f3a39818 AL |
1451 | int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); |
1452 | ||
8dc1db31 | 1453 | void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid, |
c2d2588c JK |
1454 | struct kfd_vm_fault_info *info, |
1455 | struct kfd_hsa_memory_exception_data *data); | |
2640c3fa | 1456 | |
8dc1db31 | 1457 | void kfd_signal_reset_event(struct kfd_node *dev); |
e42051d2 | 1458 | |
8dc1db31 | 1459 | void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); |
e2b1f9f5 | 1460 | |
3543b055 | 1461 | void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); |
403575c4 | 1462 | |
459ccca5 LY |
1463 | static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) |
1464 | { | |
75dda67c PY |
1465 | return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || |
1466 | KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || | |
1467 | (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || | |
459ccca5 LY |
1468 | KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); |
1469 | } | |
1470 | ||
c2d2588c JK |
1471 | int kfd_send_exception_to_runtime(struct kfd_process *p, |
1472 | unsigned int queue_id, | |
1473 | uint64_t error_reason); | |
e42051d2 SL |
1474 | bool kfd_is_locked(void); |
1475 | ||
f756e631 | 1476 | /* Compute profile */ |
8dc1db31 MJ |
1477 | void kfd_inc_compute_active(struct kfd_node *dev); |
1478 | void kfd_dec_compute_active(struct kfd_node *dev); | |
f756e631 | 1479 | |
6b855f7b HK |
1480 | /* Cgroup Support */ |
1481 | /* Check with device cgroup if @kfd device is accessible */ | |
8dc1db31 | 1482 | static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd) |
6b855f7b | 1483 | { |
eec8fd02 | 1484 | #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) |
d69a3b76 | 1485 | struct drm_device *ddev = adev_to_drm(kfd->adev); |
6b855f7b | 1486 | |
99c7b309 | 1487 | return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, |
6b855f7b HK |
1488 | ddev->render->index, |
1489 | DEVCG_ACC_WRITE | DEVCG_ACC_READ); | |
1490 | #else | |
1491 | return 0; | |
1492 | #endif | |
1493 | } | |
1494 | ||
74c5b85d MJ |
1495 | static inline bool kfd_is_first_node(struct kfd_node *node) |
1496 | { | |
1497 | return (node == node->kfd->nodes[0]); | |
1498 | } | |
1499 | ||
851a645e FK |
1500 | /* Debugfs */ |
1501 | #if defined(CONFIG_DEBUG_FS) | |
1502 | ||
1503 | void kfd_debugfs_init(void); | |
1504 | void kfd_debugfs_fini(void); | |
1505 | int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data); | |
1506 | int pqm_debugfs_mqds(struct seq_file *m, void *data); | |
1507 | int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data); | |
1508 | int dqm_debugfs_hqds(struct seq_file *m, void *data); | |
1509 | int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); | |
1510 | int pm_debugfs_runlist(struct seq_file *m, void *data); | |
1511 | ||
8dc1db31 | 1512 | int kfd_debugfs_hang_hws(struct kfd_node *dev); |
a29ec470 | 1513 | int pm_debugfs_hang_hws(struct packet_manager *pm); |
4f942aae | 1514 | int dqm_debugfs_hang_hws(struct device_queue_manager *dqm); |
a29ec470 | 1515 | |
851a645e FK |
1516 | #else |
1517 | ||
1518 | static inline void kfd_debugfs_init(void) {} | |
1519 | static inline void kfd_debugfs_fini(void) {} | |
1520 | ||
1521 | #endif | |
1522 | ||
4a488a7a | 1523 | #endif |