Commit | Line | Data |
---|---|---|
d5a114a6 FK |
1 | /* |
2 | * Copyright 2014-2018 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #define pr_fmt(fmt) "kfd2kgd: " fmt | |
24 | ||
25 | #include <linux/module.h> | |
26 | #include <linux/fdtable.h> | |
27 | #include <linux/uaccess.h> | |
28 | #include <linux/firmware.h> | |
29 | #include <drm/drmP.h> | |
30 | #include "amdgpu.h" | |
31 | #include "amdgpu_amdkfd.h" | |
32 | #include "amdgpu_ucode.h" | |
33 | #include "soc15_hw_ip.h" | |
34 | #include "gc/gc_9_0_offset.h" | |
35 | #include "gc/gc_9_0_sh_mask.h" | |
36 | #include "vega10_enum.h" | |
37 | #include "sdma0/sdma0_4_0_offset.h" | |
38 | #include "sdma0/sdma0_4_0_sh_mask.h" | |
39 | #include "sdma1/sdma1_4_0_offset.h" | |
40 | #include "sdma1/sdma1_4_0_sh_mask.h" | |
41 | #include "athub/athub_1_0_offset.h" | |
42 | #include "athub/athub_1_0_sh_mask.h" | |
43 | #include "oss/osssys_4_0_offset.h" | |
44 | #include "oss/osssys_4_0_sh_mask.h" | |
45 | #include "soc15_common.h" | |
46 | #include "v9_structs.h" | |
47 | #include "soc15.h" | |
48 | #include "soc15d.h" | |
49 | ||
50 | /* HACK: MMHUB and GC both have VM-related register with the same | |
51 | * names but different offsets. Define the MMHUB register we need here | |
52 | * with a prefix. A proper solution would be to move the functions | |
53 | * programming these registers into gfx_v9_0.c and mmhub_v1_0.c | |
54 | * respectively. | |
55 | */ | |
56 | #define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 | |
57 | #define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 | |
58 | ||
59 | #define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 | |
60 | #define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 | |
61 | ||
62 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b | |
63 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 | |
64 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c | |
65 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 | |
66 | ||
67 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b | |
68 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 | |
69 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c | |
70 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 | |
71 | ||
72 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b | |
73 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 | |
74 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c | |
75 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 | |
76 | ||
77 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 | |
78 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 | |
79 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 | |
80 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 | |
81 | ||
82 | #define V9_PIPE_PER_MEC (4) | |
83 | #define V9_QUEUES_PER_PIPE_MEC (8) | |
84 | ||
85 | enum hqd_dequeue_request_type { | |
86 | NO_ACTION = 0, | |
87 | DRAIN_PIPE, | |
88 | RESET_WAVES | |
89 | }; | |
90 | ||
91 | /* | |
92 | * Register access functions | |
93 | */ | |
94 | ||
95 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |
96 | uint32_t sh_mem_config, | |
97 | uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, | |
98 | uint32_t sh_mem_bases); | |
99 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |
100 | unsigned int vmid); | |
101 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | |
102 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |
103 | uint32_t queue_id, uint32_t __user *wptr, | |
104 | uint32_t wptr_shift, uint32_t wptr_mask, | |
105 | struct mm_struct *mm); | |
106 | static int kgd_hqd_dump(struct kgd_dev *kgd, | |
107 | uint32_t pipe_id, uint32_t queue_id, | |
108 | uint32_t (**dump)[2], uint32_t *n_regs); | |
109 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, | |
110 | uint32_t __user *wptr, struct mm_struct *mm); | |
111 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, | |
112 | uint32_t engine_id, uint32_t queue_id, | |
113 | uint32_t (**dump)[2], uint32_t *n_regs); | |
114 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | |
115 | uint32_t pipe_id, uint32_t queue_id); | |
116 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); | |
117 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, | |
118 | enum kfd_preempt_type reset_type, | |
119 | unsigned int utimeout, uint32_t pipe_id, | |
120 | uint32_t queue_id); | |
121 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | |
122 | unsigned int utimeout); | |
123 | static int kgd_address_watch_disable(struct kgd_dev *kgd); | |
124 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | |
125 | unsigned int watch_point_id, | |
126 | uint32_t cntl_val, | |
127 | uint32_t addr_hi, | |
128 | uint32_t addr_lo); | |
129 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | |
130 | uint32_t gfx_index_val, | |
131 | uint32_t sq_cmd); | |
132 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |
133 | unsigned int watch_point_id, | |
134 | unsigned int reg_offset); | |
135 | ||
136 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | |
137 | uint8_t vmid); | |
138 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |
139 | uint8_t vmid); | |
140 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | |
141 | uint32_t page_table_base); | |
142 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); | |
143 | static void set_scratch_backing_va(struct kgd_dev *kgd, | |
144 | uint64_t va, uint32_t vmid); | |
145 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); | |
146 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); | |
147 | ||
148 | /* Because of REG_GET_FIELD() being used, we put this function in the | |
149 | * asic specific file. | |
150 | */ | |
151 | static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, | |
152 | struct tile_config *config) | |
153 | { | |
154 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
155 | ||
156 | config->gb_addr_config = adev->gfx.config.gb_addr_config; | |
157 | ||
158 | config->tile_config_ptr = adev->gfx.config.tile_mode_array; | |
159 | config->num_tile_configs = | |
160 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); | |
161 | config->macro_tile_config_ptr = | |
162 | adev->gfx.config.macrotile_mode_array; | |
163 | config->num_macro_tile_configs = | |
164 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | |
165 | ||
166 | return 0; | |
167 | } | |
168 | ||
169 | static const struct kfd2kgd_calls kfd2kgd = { | |
170 | .init_gtt_mem_allocation = alloc_gtt_mem, | |
171 | .free_gtt_mem = free_gtt_mem, | |
172 | .get_local_mem_info = get_local_mem_info, | |
173 | .get_gpu_clock_counter = get_gpu_clock_counter, | |
174 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | |
175 | .alloc_pasid = amdgpu_pasid_alloc, | |
176 | .free_pasid = amdgpu_pasid_free, | |
177 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | |
178 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | |
179 | .init_interrupts = kgd_init_interrupts, | |
180 | .hqd_load = kgd_hqd_load, | |
181 | .hqd_sdma_load = kgd_hqd_sdma_load, | |
182 | .hqd_dump = kgd_hqd_dump, | |
183 | .hqd_sdma_dump = kgd_hqd_sdma_dump, | |
184 | .hqd_is_occupied = kgd_hqd_is_occupied, | |
185 | .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, | |
186 | .hqd_destroy = kgd_hqd_destroy, | |
187 | .hqd_sdma_destroy = kgd_hqd_sdma_destroy, | |
188 | .address_watch_disable = kgd_address_watch_disable, | |
189 | .address_watch_execute = kgd_address_watch_execute, | |
190 | .wave_control_execute = kgd_wave_control_execute, | |
191 | .address_watch_get_offset = kgd_address_watch_get_offset, | |
192 | .get_atc_vmid_pasid_mapping_pasid = | |
193 | get_atc_vmid_pasid_mapping_pasid, | |
194 | .get_atc_vmid_pasid_mapping_valid = | |
195 | get_atc_vmid_pasid_mapping_valid, | |
196 | .get_fw_version = get_fw_version, | |
197 | .set_scratch_backing_va = set_scratch_backing_va, | |
198 | .get_tile_config = amdgpu_amdkfd_get_tile_config, | |
199 | .get_cu_info = get_cu_info, | |
200 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, | |
201 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | |
202 | .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, | |
203 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | |
204 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | |
205 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | |
206 | .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, | |
207 | .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, | |
208 | .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, | |
209 | .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, | |
210 | .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, | |
211 | .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, | |
212 | .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, | |
213 | .invalidate_tlbs = invalidate_tlbs, | |
214 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | |
215 | .submit_ib = amdgpu_amdkfd_submit_ib, | |
216 | }; | |
217 | ||
218 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) | |
219 | { | |
220 | return (struct kfd2kgd_calls *)&kfd2kgd; | |
221 | } | |
222 | ||
223 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | |
224 | { | |
225 | return (struct amdgpu_device *)kgd; | |
226 | } | |
227 | ||
228 | static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, | |
229 | uint32_t queue, uint32_t vmid) | |
230 | { | |
231 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
232 | ||
233 | mutex_lock(&adev->srbm_mutex); | |
234 | soc15_grbm_select(adev, mec, pipe, queue, vmid); | |
235 | } | |
236 | ||
237 | static void unlock_srbm(struct kgd_dev *kgd) | |
238 | { | |
239 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
240 | ||
241 | soc15_grbm_select(adev, 0, 0, 0, 0); | |
242 | mutex_unlock(&adev->srbm_mutex); | |
243 | } | |
244 | ||
245 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | |
246 | uint32_t queue_id) | |
247 | { | |
248 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
249 | ||
250 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
251 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
252 | ||
253 | lock_srbm(kgd, mec, pipe, queue_id, 0); | |
254 | } | |
255 | ||
256 | static uint32_t get_queue_mask(struct amdgpu_device *adev, | |
257 | uint32_t pipe_id, uint32_t queue_id) | |
258 | { | |
259 | unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + | |
260 | queue_id) & 31; | |
261 | ||
262 | return ((uint32_t)1) << bit; | |
263 | } | |
264 | ||
265 | static void release_queue(struct kgd_dev *kgd) | |
266 | { | |
267 | unlock_srbm(kgd); | |
268 | } | |
269 | ||
270 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |
271 | uint32_t sh_mem_config, | |
272 | uint32_t sh_mem_ape1_base, | |
273 | uint32_t sh_mem_ape1_limit, | |
274 | uint32_t sh_mem_bases) | |
275 | { | |
276 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
277 | ||
278 | lock_srbm(kgd, 0, 0, 0, vmid); | |
279 | ||
280 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); | |
281 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); | |
282 | /* APE1 no longer exists on GFX9 */ | |
283 | ||
284 | unlock_srbm(kgd); | |
285 | } | |
286 | ||
287 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |
288 | unsigned int vmid) | |
289 | { | |
290 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
291 | ||
292 | /* | |
293 | * We have to assume that there is no outstanding mapping. | |
294 | * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because | |
295 | * a mapping is in progress or because a mapping finished | |
296 | * and the SW cleared it. | |
297 | * So the protocol is to always wait & clear. | |
298 | */ | |
299 | uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | | |
300 | ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
301 | ||
302 | /* | |
303 | * need to do this twice, once for gfx and once for mmhub | |
304 | * for ATC add 16 to VMID for mmhub, for IH different registers. | |
305 | * ATC_VMID0..15 registers are separate from ATC_VMID16..31. | |
306 | */ | |
307 | ||
308 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, | |
309 | pasid_mapping); | |
310 | ||
311 | while (!(RREG32(SOC15_REG_OFFSET( | |
312 | ATHUB, 0, | |
313 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | |
314 | (1U << vmid))) | |
315 | cpu_relax(); | |
316 | ||
317 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | |
318 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | |
319 | 1U << vmid); | |
320 | ||
321 | /* Mapping vmid to pasid also for IH block */ | |
322 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, | |
323 | pasid_mapping); | |
324 | ||
325 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, | |
326 | pasid_mapping); | |
327 | ||
328 | while (!(RREG32(SOC15_REG_OFFSET( | |
329 | ATHUB, 0, | |
330 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | |
331 | (1U << (vmid + 16)))) | |
332 | cpu_relax(); | |
333 | ||
334 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | |
335 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | |
336 | 1U << (vmid + 16)); | |
337 | ||
338 | /* Mapping vmid to pasid also for IH block */ | |
339 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, | |
340 | pasid_mapping); | |
341 | return 0; | |
342 | } | |
343 | ||
344 | /* TODO - RING0 form of field is obsolete, seems to date back to SI | |
345 | * but still works | |
346 | */ | |
347 | ||
348 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | |
349 | { | |
350 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
351 | uint32_t mec; | |
352 | uint32_t pipe; | |
353 | ||
354 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
355 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
356 | ||
357 | lock_srbm(kgd, mec, pipe, 0, 0); | |
358 | ||
359 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), | |
360 | CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | | |
361 | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); | |
362 | ||
363 | unlock_srbm(kgd); | |
364 | ||
365 | return 0; | |
366 | } | |
367 | ||
368 | static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, | |
369 | unsigned int engine_id, | |
370 | unsigned int queue_id) | |
371 | { | |
372 | uint32_t base[2] = { | |
373 | SOC15_REG_OFFSET(SDMA0, 0, | |
374 | mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, | |
375 | SOC15_REG_OFFSET(SDMA1, 0, | |
376 | mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL | |
377 | }; | |
378 | uint32_t retval; | |
379 | ||
380 | retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - | |
381 | mmSDMA0_RLC0_RB_CNTL); | |
382 | ||
383 | pr_debug("sdma base address: 0x%x\n", retval); | |
384 | ||
385 | return retval; | |
386 | } | |
387 | ||
388 | static inline struct v9_mqd *get_mqd(void *mqd) | |
389 | { | |
390 | return (struct v9_mqd *)mqd; | |
391 | } | |
392 | ||
393 | static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) | |
394 | { | |
395 | return (struct v9_sdma_mqd *)mqd; | |
396 | } | |
397 | ||
398 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |
399 | uint32_t queue_id, uint32_t __user *wptr, | |
400 | uint32_t wptr_shift, uint32_t wptr_mask, | |
401 | struct mm_struct *mm) | |
402 | { | |
403 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
404 | struct v9_mqd *m; | |
405 | uint32_t *mqd_hqd; | |
406 | uint32_t reg, hqd_base, data; | |
407 | ||
408 | m = get_mqd(mqd); | |
409 | ||
410 | acquire_queue(kgd, pipe_id, queue_id); | |
411 | ||
412 | /* HIQ is set during driver init period with vmid set to 0*/ | |
413 | if (m->cp_hqd_vmid == 0) { | |
414 | uint32_t value, mec, pipe; | |
415 | ||
416 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
417 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
418 | ||
419 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | |
420 | mec, pipe, queue_id); | |
421 | value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); | |
422 | value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, | |
423 | ((mec << 5) | (pipe << 3) | queue_id | 0x80)); | |
424 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); | |
425 | } | |
426 | ||
427 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ | |
428 | mqd_hqd = &m->cp_mqd_base_addr_lo; | |
429 | hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | |
430 | ||
431 | for (reg = hqd_base; | |
432 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | |
433 | WREG32(reg, mqd_hqd[reg - hqd_base]); | |
434 | ||
435 | ||
436 | /* Activate doorbell logic before triggering WPTR poll. */ | |
437 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | |
438 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | |
439 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); | |
440 | ||
441 | if (wptr) { | |
442 | /* Don't read wptr with get_user because the user | |
443 | * context may not be accessible (if this function | |
444 | * runs in a work queue). Instead trigger a one-shot | |
445 | * polling read from memory in the CP. This assumes | |
446 | * that wptr is GPU-accessible in the queue's VMID via | |
447 | * ATC or SVM. WPTR==RPTR before starting the poll so | |
448 | * the CP starts fetching new commands from the right | |
449 | * place. | |
450 | * | |
451 | * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit | |
452 | * tricky. Assume that the queue didn't overflow. The | |
453 | * number of valid bits in the 32-bit RPTR depends on | |
454 | * the queue size. The remaining bits are taken from | |
455 | * the saved 64-bit WPTR. If the WPTR wrapped, add the | |
456 | * queue size. | |
457 | */ | |
458 | uint32_t queue_size = | |
459 | 2 << REG_GET_FIELD(m->cp_hqd_pq_control, | |
460 | CP_HQD_PQ_CONTROL, QUEUE_SIZE); | |
461 | uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); | |
462 | ||
463 | if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) | |
464 | guessed_wptr += queue_size; | |
465 | guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); | |
466 | guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; | |
467 | ||
468 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), | |
469 | lower_32_bits(guessed_wptr)); | |
470 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), | |
471 | upper_32_bits(guessed_wptr)); | |
472 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), | |
ebe1d22b | 473 | lower_32_bits((uintptr_t)wptr)); |
d5a114a6 | 474 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), |
ebe1d22b | 475 | upper_32_bits((uintptr_t)wptr)); |
d5a114a6 FK |
476 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), |
477 | get_queue_mask(adev, pipe_id, queue_id)); | |
478 | } | |
479 | ||
480 | /* Start the EOP fetcher */ | |
481 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), | |
482 | REG_SET_FIELD(m->cp_hqd_eop_rptr, | |
483 | CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); | |
484 | ||
485 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | |
486 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); | |
487 | ||
488 | release_queue(kgd); | |
489 | ||
490 | return 0; | |
491 | } | |
492 | ||
493 | static int kgd_hqd_dump(struct kgd_dev *kgd, | |
494 | uint32_t pipe_id, uint32_t queue_id, | |
495 | uint32_t (**dump)[2], uint32_t *n_regs) | |
496 | { | |
497 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
498 | uint32_t i = 0, reg; | |
499 | #define HQD_N_REGS 56 | |
500 | #define DUMP_REG(addr) do { \ | |
501 | if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ | |
502 | break; \ | |
503 | (*dump)[i][0] = (addr) << 2; \ | |
504 | (*dump)[i++][1] = RREG32(addr); \ | |
505 | } while (0) | |
506 | ||
507 | *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); | |
508 | if (*dump == NULL) | |
509 | return -ENOMEM; | |
510 | ||
511 | acquire_queue(kgd, pipe_id, queue_id); | |
512 | ||
513 | for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | |
514 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | |
515 | DUMP_REG(reg); | |
516 | ||
517 | release_queue(kgd); | |
518 | ||
519 | WARN_ON_ONCE(i != HQD_N_REGS); | |
520 | *n_regs = i; | |
521 | ||
522 | return 0; | |
523 | } | |
524 | ||
525 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, | |
526 | uint32_t __user *wptr, struct mm_struct *mm) | |
527 | { | |
528 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
529 | struct v9_sdma_mqd *m; | |
530 | uint32_t sdma_base_addr, sdmax_gfx_context_cntl; | |
531 | unsigned long end_jiffies; | |
532 | uint32_t data; | |
533 | uint64_t data64; | |
534 | uint64_t __user *wptr64 = (uint64_t __user *)wptr; | |
535 | ||
536 | m = get_sdma_mqd(mqd); | |
537 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | |
538 | m->sdma_queue_id); | |
539 | sdmax_gfx_context_cntl = m->sdma_engine_id ? | |
540 | SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : | |
541 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); | |
542 | ||
543 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, | |
544 | m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); | |
545 | ||
546 | end_jiffies = msecs_to_jiffies(2000) + jiffies; | |
547 | while (true) { | |
548 | data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | |
549 | if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) | |
550 | break; | |
551 | if (time_after(jiffies, end_jiffies)) | |
552 | return -ETIME; | |
553 | usleep_range(500, 1000); | |
554 | } | |
555 | data = RREG32(sdmax_gfx_context_cntl); | |
556 | data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, | |
557 | RESUME_CTX, 0); | |
558 | WREG32(sdmax_gfx_context_cntl, data); | |
559 | ||
560 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, | |
561 | m->sdmax_rlcx_doorbell_offset); | |
562 | ||
563 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, | |
564 | ENABLE, 1); | |
565 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); | |
566 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); | |
567 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, | |
568 | m->sdmax_rlcx_rb_rptr_hi); | |
569 | ||
570 | WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); | |
571 | if (read_user_wptr(mm, wptr64, data64)) { | |
572 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, | |
573 | lower_32_bits(data64)); | |
574 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, | |
575 | upper_32_bits(data64)); | |
576 | } else { | |
577 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, | |
578 | m->sdmax_rlcx_rb_rptr); | |
579 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, | |
580 | m->sdmax_rlcx_rb_rptr_hi); | |
581 | } | |
582 | WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); | |
583 | ||
584 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); | |
585 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, | |
586 | m->sdmax_rlcx_rb_base_hi); | |
587 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, | |
588 | m->sdmax_rlcx_rb_rptr_addr_lo); | |
589 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, | |
590 | m->sdmax_rlcx_rb_rptr_addr_hi); | |
591 | ||
592 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, | |
593 | RB_ENABLE, 1); | |
594 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); | |
595 | ||
596 | return 0; | |
597 | } | |
598 | ||
599 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, | |
600 | uint32_t engine_id, uint32_t queue_id, | |
601 | uint32_t (**dump)[2], uint32_t *n_regs) | |
602 | { | |
603 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
604 | uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); | |
605 | uint32_t i = 0, reg; | |
606 | #undef HQD_N_REGS | |
607 | #define HQD_N_REGS (19+6+7+10) | |
608 | ||
609 | *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); | |
610 | if (*dump == NULL) | |
611 | return -ENOMEM; | |
612 | ||
613 | for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) | |
614 | DUMP_REG(sdma_base_addr + reg); | |
615 | for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) | |
616 | DUMP_REG(sdma_base_addr + reg); | |
617 | for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; | |
618 | reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) | |
619 | DUMP_REG(sdma_base_addr + reg); | |
620 | for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; | |
621 | reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) | |
622 | DUMP_REG(sdma_base_addr + reg); | |
623 | ||
624 | WARN_ON_ONCE(i != HQD_N_REGS); | |
625 | *n_regs = i; | |
626 | ||
627 | return 0; | |
628 | } | |
629 | ||
630 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | |
631 | uint32_t pipe_id, uint32_t queue_id) | |
632 | { | |
633 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
634 | uint32_t act; | |
635 | bool retval = false; | |
636 | uint32_t low, high; | |
637 | ||
638 | acquire_queue(kgd, pipe_id, queue_id); | |
639 | act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | |
640 | if (act) { | |
641 | low = lower_32_bits(queue_address >> 8); | |
642 | high = upper_32_bits(queue_address >> 8); | |
643 | ||
644 | if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && | |
645 | high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) | |
646 | retval = true; | |
647 | } | |
648 | release_queue(kgd); | |
649 | return retval; | |
650 | } | |
651 | ||
652 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | |
653 | { | |
654 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
655 | struct v9_sdma_mqd *m; | |
656 | uint32_t sdma_base_addr; | |
657 | uint32_t sdma_rlc_rb_cntl; | |
658 | ||
659 | m = get_sdma_mqd(mqd); | |
660 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | |
661 | m->sdma_queue_id); | |
662 | ||
663 | sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | |
664 | ||
665 | if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) | |
666 | return true; | |
667 | ||
668 | return false; | |
669 | } | |
670 | ||
671 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, | |
672 | enum kfd_preempt_type reset_type, | |
673 | unsigned int utimeout, uint32_t pipe_id, | |
674 | uint32_t queue_id) | |
675 | { | |
676 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
677 | enum hqd_dequeue_request_type type; | |
678 | unsigned long end_jiffies; | |
679 | uint32_t temp; | |
680 | struct v9_mqd *m = get_mqd(mqd); | |
681 | ||
682 | acquire_queue(kgd, pipe_id, queue_id); | |
683 | ||
684 | if (m->cp_hqd_vmid == 0) | |
685 | WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); | |
686 | ||
687 | switch (reset_type) { | |
688 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | |
689 | type = DRAIN_PIPE; | |
690 | break; | |
691 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | |
692 | type = RESET_WAVES; | |
693 | break; | |
694 | default: | |
695 | type = DRAIN_PIPE; | |
696 | break; | |
697 | } | |
698 | ||
699 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); | |
700 | ||
701 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
702 | while (true) { | |
703 | temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | |
704 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) | |
705 | break; | |
706 | if (time_after(jiffies, end_jiffies)) { | |
707 | pr_err("cp queue preemption time out.\n"); | |
708 | release_queue(kgd); | |
709 | return -ETIME; | |
710 | } | |
711 | usleep_range(500, 1000); | |
712 | } | |
713 | ||
714 | release_queue(kgd); | |
715 | return 0; | |
716 | } | |
717 | ||
718 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | |
719 | unsigned int utimeout) | |
720 | { | |
721 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
722 | struct v9_sdma_mqd *m; | |
723 | uint32_t sdma_base_addr; | |
724 | uint32_t temp; | |
725 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
726 | ||
727 | m = get_sdma_mqd(mqd); | |
728 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | |
729 | m->sdma_queue_id); | |
730 | ||
731 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | |
732 | temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; | |
733 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); | |
734 | ||
735 | while (true) { | |
736 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | |
737 | if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) | |
738 | break; | |
739 | if (time_after(jiffies, end_jiffies)) | |
740 | return -ETIME; | |
741 | usleep_range(500, 1000); | |
742 | } | |
743 | ||
744 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); | |
745 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, | |
746 | RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | | |
747 | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); | |
748 | ||
749 | m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); | |
750 | m->sdmax_rlcx_rb_rptr_hi = | |
751 | RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); | |
752 | ||
753 | return 0; | |
754 | } | |
755 | ||
756 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | |
757 | uint8_t vmid) | |
758 | { | |
759 | uint32_t reg; | |
760 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
761 | ||
762 | reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) | |
763 | + vmid); | |
764 | return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
765 | } | |
766 | ||
767 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |
768 | uint8_t vmid) | |
769 | { | |
770 | uint32_t reg; | |
771 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
772 | ||
773 | reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) | |
774 | + vmid); | |
775 | return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; | |
776 | } | |
777 | ||
778 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | |
779 | { | |
780 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
781 | uint32_t req = (1 << vmid) | | |
782 | (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ | |
783 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | | |
784 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | | |
785 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | | |
786 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | | |
787 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; | |
788 | ||
789 | mutex_lock(&adev->srbm_mutex); | |
790 | ||
791 | /* Use legacy mode tlb invalidation. | |
792 | * | |
793 | * Currently on Raven the code below is broken for anything but | |
794 | * legacy mode due to a MMHUB power gating problem. A workaround | |
795 | * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ | |
796 | * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack | |
797 | * bit. | |
798 | * | |
799 | * TODO 1: agree on the right set of invalidation registers for | |
800 | * KFD use. Use the last one for now. Invalidate both GC and | |
801 | * MMHUB. | |
802 | * | |
803 | * TODO 2: support range-based invalidation, requires kfg2kgd | |
804 | * interface change | |
805 | */ | |
806 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), | |
807 | 0xffffffff); | |
808 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), | |
809 | 0x0000001f); | |
810 | ||
811 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | |
812 | mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), | |
813 | 0xffffffff); | |
814 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | |
815 | mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), | |
816 | 0x0000001f); | |
817 | ||
818 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); | |
819 | ||
820 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), | |
821 | req); | |
822 | ||
823 | while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & | |
824 | (1 << vmid))) | |
825 | cpu_relax(); | |
826 | ||
827 | while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, | |
828 | mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & | |
829 | (1 << vmid))) | |
830 | cpu_relax(); | |
831 | ||
832 | mutex_unlock(&adev->srbm_mutex); | |
833 | ||
834 | } | |
835 | ||
836 | static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) | |
837 | { | |
838 | signed long r; | |
839 | uint32_t seq; | |
840 | struct amdgpu_ring *ring = &adev->gfx.kiq.ring; | |
841 | ||
842 | spin_lock(&adev->gfx.kiq.ring_lock); | |
843 | amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ | |
844 | amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); | |
845 | amdgpu_ring_write(ring, | |
846 | PACKET3_INVALIDATE_TLBS_DST_SEL(1) | | |
847 | PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | | |
848 | PACKET3_INVALIDATE_TLBS_PASID(pasid) | | |
849 | PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ | |
850 | amdgpu_fence_emit_polling(ring, &seq); | |
851 | amdgpu_ring_commit(ring); | |
852 | spin_unlock(&adev->gfx.kiq.ring_lock); | |
853 | ||
854 | r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); | |
855 | if (r < 1) { | |
856 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | |
857 | return -ETIME; | |
858 | } | |
859 | ||
860 | return 0; | |
861 | } | |
862 | ||
863 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) | |
864 | { | |
865 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
866 | int vmid; | |
867 | struct amdgpu_ring *ring = &adev->gfx.kiq.ring; | |
868 | ||
869 | if (ring->ready) | |
870 | return invalidate_tlbs_with_kiq(adev, pasid); | |
871 | ||
872 | for (vmid = 0; vmid < 16; vmid++) { | |
873 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) | |
874 | continue; | |
875 | if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { | |
876 | if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) | |
877 | == pasid) { | |
878 | write_vmid_invalidate_request(kgd, vmid); | |
879 | break; | |
880 | } | |
881 | } | |
882 | } | |
883 | ||
884 | return 0; | |
885 | } | |
886 | ||
887 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) | |
888 | { | |
889 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
890 | ||
891 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
892 | pr_err("non kfd vmid %d\n", vmid); | |
893 | return 0; | |
894 | } | |
895 | ||
896 | write_vmid_invalidate_request(kgd, vmid); | |
897 | return 0; | |
898 | } | |
899 | ||
900 | static int kgd_address_watch_disable(struct kgd_dev *kgd) | |
901 | { | |
902 | return 0; | |
903 | } | |
904 | ||
905 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | |
906 | unsigned int watch_point_id, | |
907 | uint32_t cntl_val, | |
908 | uint32_t addr_hi, | |
909 | uint32_t addr_lo) | |
910 | { | |
911 | return 0; | |
912 | } | |
913 | ||
914 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | |
915 | uint32_t gfx_index_val, | |
916 | uint32_t sq_cmd) | |
917 | { | |
918 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
919 | uint32_t data = 0; | |
920 | ||
921 | mutex_lock(&adev->grbm_idx_mutex); | |
922 | ||
923 | WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); | |
924 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); | |
925 | ||
926 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
927 | INSTANCE_BROADCAST_WRITES, 1); | |
928 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
929 | SH_BROADCAST_WRITES, 1); | |
930 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
931 | SE_BROADCAST_WRITES, 1); | |
932 | ||
933 | WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); | |
934 | mutex_unlock(&adev->grbm_idx_mutex); | |
935 | ||
936 | return 0; | |
937 | } | |
938 | ||
939 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |
940 | unsigned int watch_point_id, | |
941 | unsigned int reg_offset) | |
942 | { | |
943 | return 0; | |
944 | } | |
945 | ||
946 | static void set_scratch_backing_va(struct kgd_dev *kgd, | |
947 | uint64_t va, uint32_t vmid) | |
948 | { | |
949 | /* No longer needed on GFXv9. The scratch base address is | |
950 | * passed to the shader by the CP. It's the user mode driver's | |
951 | * responsibility. | |
952 | */ | |
953 | } | |
954 | ||
955 | /* FIXME: Does this need to be ASIC-specific code? */ | |
956 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | |
957 | { | |
958 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
959 | const union amdgpu_firmware_header *hdr; | |
960 | ||
961 | switch (type) { | |
962 | case KGD_ENGINE_PFP: | |
963 | hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; | |
964 | break; | |
965 | ||
966 | case KGD_ENGINE_ME: | |
967 | hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; | |
968 | break; | |
969 | ||
970 | case KGD_ENGINE_CE: | |
971 | hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; | |
972 | break; | |
973 | ||
974 | case KGD_ENGINE_MEC1: | |
975 | hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; | |
976 | break; | |
977 | ||
978 | case KGD_ENGINE_MEC2: | |
979 | hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; | |
980 | break; | |
981 | ||
982 | case KGD_ENGINE_RLC: | |
983 | hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; | |
984 | break; | |
985 | ||
986 | case KGD_ENGINE_SDMA1: | |
987 | hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; | |
988 | break; | |
989 | ||
990 | case KGD_ENGINE_SDMA2: | |
991 | hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; | |
992 | break; | |
993 | ||
994 | default: | |
995 | return 0; | |
996 | } | |
997 | ||
998 | if (hdr == NULL) | |
999 | return 0; | |
1000 | ||
1001 | /* Only 12 bit in use*/ | |
1002 | return hdr->common.ucode_version; | |
1003 | } | |
1004 | ||
1005 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | |
1006 | uint32_t page_table_base) | |
1007 | { | |
1008 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
1009 | uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | | |
1010 | AMDGPU_PTE_VALID; | |
1011 | ||
1012 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
1013 | pr_err("trying to set page table base for wrong VMID %u\n", | |
1014 | vmid); | |
1015 | return; | |
1016 | } | |
1017 | ||
1018 | /* TODO: take advantage of per-process address space size. For | |
1019 | * now, all processes share the same address space size, like | |
1020 | * on GFX8 and older. | |
1021 | */ | |
1022 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); | |
1023 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); | |
1024 | ||
1025 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), | |
1026 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | |
1027 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), | |
1028 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | |
1029 | ||
1030 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); | |
1031 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); | |
1032 | ||
1033 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); | |
1034 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); | |
1035 | ||
1036 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), | |
1037 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | |
1038 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), | |
1039 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | |
1040 | ||
1041 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); | |
1042 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); | |
1043 | } |