Commit | Line | Data |
---|---|---|
ff758a12 BG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #include <linux/module.h> | |
24 | #include <linux/fdtable.h> | |
25 | #include <linux/uaccess.h> | |
5634e38c | 26 | #include <linux/mmu_context.h> |
fdf2f6c5 | 27 | |
ff758a12 BG |
28 | #include "amdgpu.h" |
29 | #include "amdgpu_amdkfd.h" | |
97bf47b2 | 30 | #include "gfx_v8_0.h" |
ff758a12 BG |
31 | #include "gca/gfx_8_0_sh_mask.h" |
32 | #include "gca/gfx_8_0_d.h" | |
33 | #include "gca/gfx_8_0_enum.h" | |
34 | #include "oss/oss_3_0_sh_mask.h" | |
35 | #include "oss/oss_3_0_d.h" | |
36 | #include "gmc/gmc_8_1_sh_mask.h" | |
37 | #include "gmc/gmc_8_1_d.h" | |
38 | #include "vi_structs.h" | |
39 | #include "vid.h" | |
40 | ||
70539bd7 FK |
41 | enum hqd_dequeue_request_type { |
42 | NO_ACTION = 0, | |
43 | DRAIN_PIPE, | |
44 | RESET_WAVES | |
45 | }; | |
46 | ||
ff758a12 BG |
47 | /* |
48 | * Register access functions | |
49 | */ | |
50 | ||
51 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |
52 | uint32_t sh_mem_config, | |
53 | uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, | |
54 | uint32_t sh_mem_bases); | |
55 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |
56 | unsigned int vmid); | |
ff758a12 BG |
57 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); |
58 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |
70539bd7 FK |
59 | uint32_t queue_id, uint32_t __user *wptr, |
60 | uint32_t wptr_shift, uint32_t wptr_mask, | |
61 | struct mm_struct *mm); | |
80c195f5 FK |
62 | static int kgd_hqd_dump(struct kgd_dev *kgd, |
63 | uint32_t pipe_id, uint32_t queue_id, | |
64 | uint32_t (**dump)[2], uint32_t *n_regs); | |
7ce66118 FK |
65 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, |
66 | uint32_t __user *wptr, struct mm_struct *mm); | |
80c195f5 FK |
67 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, |
68 | uint32_t engine_id, uint32_t queue_id, | |
69 | uint32_t (**dump)[2], uint32_t *n_regs); | |
ff758a12 BG |
70 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
71 | uint32_t pipe_id, uint32_t queue_id); | |
72 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); | |
70539bd7 FK |
73 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
74 | enum kfd_preempt_type reset_type, | |
1d602430 | 75 | unsigned int utimeout, uint32_t pipe_id, |
ff758a12 BG |
76 | uint32_t queue_id); |
77 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | |
1d602430 | 78 | unsigned int utimeout); |
ff758a12 BG |
79 | static int kgd_address_watch_disable(struct kgd_dev *kgd); |
80 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | |
81 | unsigned int watch_point_id, | |
82 | uint32_t cntl_val, | |
83 | uint32_t addr_hi, | |
84 | uint32_t addr_lo); | |
85 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | |
86 | uint32_t gfx_index_val, | |
87 | uint32_t sq_cmd); | |
88 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |
89 | unsigned int watch_point_id, | |
90 | unsigned int reg_offset); | |
91 | ||
92 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | |
93 | uint8_t vmid); | |
94 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |
95 | uint8_t vmid); | |
09e56abb MR |
96 | static void set_scratch_backing_va(struct kgd_dev *kgd, |
97 | uint64_t va, uint32_t vmid); | |
a46a2cd1 | 98 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, |
e715c6d0 | 99 | uint64_t page_table_base); |
a46a2cd1 FK |
100 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); |
101 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); | |
ff758a12 | 102 | |
fb31a0c9 YZ |
103 | /* Because of REG_GET_FIELD() being used, we put this function in the |
104 | * asic specific file. | |
105 | */ | |
106 | static int get_tile_config(struct kgd_dev *kgd, | |
107 | struct tile_config *config) | |
108 | { | |
109 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
110 | ||
111 | config->gb_addr_config = adev->gfx.config.gb_addr_config; | |
112 | config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, | |
113 | MC_ARB_RAMCFG, NOOFBANK); | |
114 | config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, | |
115 | MC_ARB_RAMCFG, NOOFRANKS); | |
116 | ||
117 | config->tile_config_ptr = adev->gfx.config.tile_mode_array; | |
118 | config->num_tile_configs = | |
119 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); | |
120 | config->macro_tile_config_ptr = | |
121 | adev->gfx.config.macrotile_mode_array; | |
122 | config->num_macro_tile_configs = | |
123 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | |
124 | ||
125 | return 0; | |
126 | } | |
127 | ||
ff758a12 | 128 | static const struct kfd2kgd_calls kfd2kgd = { |
ff758a12 BG |
129 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
130 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | |
ff758a12 BG |
131 | .init_interrupts = kgd_init_interrupts, |
132 | .hqd_load = kgd_hqd_load, | |
133 | .hqd_sdma_load = kgd_hqd_sdma_load, | |
80c195f5 FK |
134 | .hqd_dump = kgd_hqd_dump, |
135 | .hqd_sdma_dump = kgd_hqd_sdma_dump, | |
ff758a12 BG |
136 | .hqd_is_occupied = kgd_hqd_is_occupied, |
137 | .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, | |
138 | .hqd_destroy = kgd_hqd_destroy, | |
139 | .hqd_sdma_destroy = kgd_hqd_sdma_destroy, | |
140 | .address_watch_disable = kgd_address_watch_disable, | |
141 | .address_watch_execute = kgd_address_watch_execute, | |
142 | .wave_control_execute = kgd_wave_control_execute, | |
143 | .address_watch_get_offset = kgd_address_watch_get_offset, | |
144 | .get_atc_vmid_pasid_mapping_pasid = | |
145 | get_atc_vmid_pasid_mapping_pasid, | |
146 | .get_atc_vmid_pasid_mapping_valid = | |
147 | get_atc_vmid_pasid_mapping_valid, | |
09e56abb | 148 | .set_scratch_backing_va = set_scratch_backing_va, |
fb31a0c9 | 149 | .get_tile_config = get_tile_config, |
a46a2cd1 | 150 | .set_vm_context_page_table_base = set_vm_context_page_table_base, |
a46a2cd1 FK |
151 | .invalidate_tlbs = invalidate_tlbs, |
152 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | |
ff758a12 BG |
153 | }; |
154 | ||
f785d987 | 155 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) |
ff758a12 BG |
156 | { |
157 | return (struct kfd2kgd_calls *)&kfd2kgd; | |
158 | } | |
159 | ||
160 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | |
161 | { | |
162 | return (struct amdgpu_device *)kgd; | |
163 | } | |
164 | ||
165 | static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, | |
166 | uint32_t queue, uint32_t vmid) | |
167 | { | |
168 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
169 | uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); | |
170 | ||
171 | mutex_lock(&adev->srbm_mutex); | |
172 | WREG32(mmSRBM_GFX_CNTL, value); | |
173 | } | |
174 | ||
175 | static void unlock_srbm(struct kgd_dev *kgd) | |
176 | { | |
177 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
178 | ||
179 | WREG32(mmSRBM_GFX_CNTL, 0); | |
180 | mutex_unlock(&adev->srbm_mutex); | |
181 | } | |
182 | ||
183 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | |
184 | uint32_t queue_id) | |
185 | { | |
5e709562 AR |
186 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
187 | ||
438e29a2 | 188 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
5e709562 | 189 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
ff758a12 BG |
190 | |
191 | lock_srbm(kgd, mec, pipe, queue_id, 0); | |
192 | } | |
193 | ||
194 | static void release_queue(struct kgd_dev *kgd) | |
195 | { | |
196 | unlock_srbm(kgd); | |
197 | } | |
198 | ||
199 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |
200 | uint32_t sh_mem_config, | |
201 | uint32_t sh_mem_ape1_base, | |
202 | uint32_t sh_mem_ape1_limit, | |
203 | uint32_t sh_mem_bases) | |
204 | { | |
205 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
206 | ||
207 | lock_srbm(kgd, 0, 0, 0, vmid); | |
208 | ||
209 | WREG32(mmSH_MEM_CONFIG, sh_mem_config); | |
210 | WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); | |
211 | WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); | |
212 | WREG32(mmSH_MEM_BASES, sh_mem_bases); | |
213 | ||
214 | unlock_srbm(kgd); | |
215 | } | |
216 | ||
217 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |
218 | unsigned int vmid) | |
219 | { | |
220 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
221 | ||
222 | /* | |
223 | * We have to assume that there is no outstanding mapping. | |
224 | * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because | |
225 | * a mapping is in progress or because a mapping finished | |
226 | * and the SW cleared it. | |
227 | * So the protocol is to always wait & clear. | |
228 | */ | |
229 | uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | | |
230 | ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
231 | ||
232 | WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); | |
233 | ||
234 | while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) | |
235 | cpu_relax(); | |
236 | WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); | |
237 | ||
238 | /* Mapping vmid to pasid also for IH block */ | |
239 | WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); | |
240 | ||
241 | return 0; | |
242 | } | |
243 | ||
ff758a12 BG |
244 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) |
245 | { | |
246 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
247 | uint32_t mec; | |
248 | uint32_t pipe; | |
249 | ||
438e29a2 | 250 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
5e709562 | 251 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
ff758a12 BG |
252 | |
253 | lock_srbm(kgd, mec, pipe, 0, 0); | |
254 | ||
13cd51a8 HK |
255 | WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | |
256 | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); | |
ff758a12 BG |
257 | |
258 | unlock_srbm(kgd); | |
259 | ||
260 | return 0; | |
261 | } | |
262 | ||
9807c366 | 263 | static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) |
ff758a12 | 264 | { |
9807c366 PC |
265 | uint32_t retval; |
266 | ||
267 | retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + | |
268 | m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; | |
269 | pr_debug("kfd: sdma base address: 0x%x\n", retval); | |
270 | ||
271 | return retval; | |
ff758a12 BG |
272 | } |
273 | ||
274 | static inline struct vi_mqd *get_mqd(void *mqd) | |
275 | { | |
276 | return (struct vi_mqd *)mqd; | |
277 | } | |
278 | ||
9807c366 | 279 | static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) |
ff758a12 | 280 | { |
9807c366 | 281 | return (struct vi_sdma_mqd *)mqd; |
ff758a12 BG |
282 | } |
283 | ||
284 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |
70539bd7 FK |
285 | uint32_t queue_id, uint32_t __user *wptr, |
286 | uint32_t wptr_shift, uint32_t wptr_mask, | |
287 | struct mm_struct *mm) | |
ff758a12 | 288 | { |
ff758a12 | 289 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
70539bd7 FK |
290 | struct vi_mqd *m; |
291 | uint32_t *mqd_hqd; | |
292 | uint32_t reg, wptr_val, data; | |
a50ecc54 | 293 | bool valid_wptr = false; |
ff758a12 BG |
294 | |
295 | m = get_mqd(mqd); | |
296 | ||
97bf47b2 | 297 | acquire_queue(kgd, pipe_id, queue_id); |
70539bd7 FK |
298 | |
299 | /* HIQ is set during driver init period with vmid set to 0*/ | |
300 | if (m->cp_hqd_vmid == 0) { | |
301 | uint32_t value, mec, pipe; | |
302 | ||
303 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
304 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
305 | ||
306 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | |
307 | mec, pipe, queue_id); | |
308 | value = RREG32(mmRLC_CP_SCHEDULERS); | |
309 | value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, | |
310 | ((mec << 5) | (pipe << 3) | queue_id | 0x80)); | |
311 | WREG32(mmRLC_CP_SCHEDULERS, value); | |
312 | } | |
313 | ||
314 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ | |
315 | mqd_hqd = &m->cp_mqd_base_addr_lo; | |
316 | ||
317 | for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) | |
318 | WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); | |
319 | ||
320 | /* Tonga errata: EOP RPTR/WPTR should be left unmodified. | |
321 | * This is safe since EOP RPTR==WPTR for any inactive HQD | |
322 | * on ASICs that do not support context-save. | |
323 | * EOP writes/reads can start anywhere in the ring. | |
324 | */ | |
325 | if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { | |
326 | WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); | |
327 | WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); | |
328 | WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); | |
329 | } | |
330 | ||
331 | for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) | |
332 | WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); | |
333 | ||
334 | /* Copy userspace write pointer value to register. | |
335 | * Activate doorbell logic to monitor subsequent changes. | |
336 | */ | |
337 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | |
338 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | |
339 | WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); | |
340 | ||
a50ecc54 | 341 | /* read_user_ptr may take the mm->mmap_sem. |
342 | * release srbm_mutex to avoid circular dependency between | |
343 | * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. | |
344 | */ | |
345 | release_queue(kgd); | |
346 | valid_wptr = read_user_wptr(mm, wptr, wptr_val); | |
347 | acquire_queue(kgd, pipe_id, queue_id); | |
348 | if (valid_wptr) | |
70539bd7 FK |
349 | WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); |
350 | ||
351 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | |
352 | WREG32(mmCP_HQD_ACTIVE, data); | |
353 | ||
ff758a12 BG |
354 | release_queue(kgd); |
355 | ||
356 | return 0; | |
357 | } | |
358 | ||
80c195f5 FK |
359 | static int kgd_hqd_dump(struct kgd_dev *kgd, |
360 | uint32_t pipe_id, uint32_t queue_id, | |
361 | uint32_t (**dump)[2], uint32_t *n_regs) | |
362 | { | |
363 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
364 | uint32_t i = 0, reg; | |
365 | #define HQD_N_REGS (54+4) | |
366 | #define DUMP_REG(addr) do { \ | |
367 | if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ | |
368 | break; \ | |
369 | (*dump)[i][0] = (addr) << 2; \ | |
370 | (*dump)[i++][1] = RREG32(addr); \ | |
371 | } while (0) | |
372 | ||
6da2ec56 | 373 | *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); |
80c195f5 FK |
374 | if (*dump == NULL) |
375 | return -ENOMEM; | |
376 | ||
377 | acquire_queue(kgd, pipe_id, queue_id); | |
378 | ||
379 | DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); | |
380 | DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); | |
381 | DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); | |
382 | DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); | |
383 | ||
384 | for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) | |
385 | DUMP_REG(reg); | |
386 | ||
387 | release_queue(kgd); | |
388 | ||
389 | WARN_ON_ONCE(i != HQD_N_REGS); | |
390 | *n_regs = i; | |
391 | ||
392 | return 0; | |
393 | } | |
394 | ||
7ce66118 FK |
395 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, |
396 | uint32_t __user *wptr, struct mm_struct *mm) | |
ff758a12 | 397 | { |
9807c366 PC |
398 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
399 | struct vi_sdma_mqd *m; | |
400 | unsigned long end_jiffies; | |
401 | uint32_t sdma_base_addr; | |
402 | uint32_t data; | |
403 | ||
404 | m = get_sdma_mqd(mqd); | |
405 | sdma_base_addr = get_sdma_base_addr(m); | |
406 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, | |
407 | m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); | |
408 | ||
409 | end_jiffies = msecs_to_jiffies(2000) + jiffies; | |
410 | while (true) { | |
411 | data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | |
412 | if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) | |
413 | break; | |
414 | if (time_after(jiffies, end_jiffies)) | |
415 | return -ETIME; | |
416 | usleep_range(500, 1000); | |
417 | } | |
418 | if (m->sdma_engine_id) { | |
419 | data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); | |
420 | data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, | |
421 | RESUME_CTX, 0); | |
422 | WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); | |
423 | } else { | |
424 | data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); | |
425 | data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, | |
426 | RESUME_CTX, 0); | |
427 | WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); | |
428 | } | |
429 | ||
430 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, | |
431 | ENABLE, 1); | |
432 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); | |
433 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); | |
434 | ||
435 | if (read_user_wptr(mm, wptr, data)) | |
436 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); | |
437 | else | |
438 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, | |
439 | m->sdmax_rlcx_rb_rptr); | |
440 | ||
441 | WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, | |
442 | m->sdmax_rlcx_virtual_addr); | |
443 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); | |
444 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, | |
445 | m->sdmax_rlcx_rb_base_hi); | |
446 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, | |
447 | m->sdmax_rlcx_rb_rptr_addr_lo); | |
448 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, | |
449 | m->sdmax_rlcx_rb_rptr_addr_hi); | |
450 | ||
451 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, | |
452 | RB_ENABLE, 1); | |
453 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); | |
454 | ||
ff758a12 BG |
455 | return 0; |
456 | } | |
457 | ||
80c195f5 FK |
458 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, |
459 | uint32_t engine_id, uint32_t queue_id, | |
460 | uint32_t (**dump)[2], uint32_t *n_regs) | |
461 | { | |
462 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
463 | uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + | |
464 | queue_id * KFD_VI_SDMA_QUEUE_OFFSET; | |
465 | uint32_t i = 0, reg; | |
466 | #undef HQD_N_REGS | |
467 | #define HQD_N_REGS (19+4+2+3+7) | |
468 | ||
6da2ec56 | 469 | *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); |
80c195f5 FK |
470 | if (*dump == NULL) |
471 | return -ENOMEM; | |
472 | ||
473 | for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) | |
474 | DUMP_REG(sdma_offset + reg); | |
475 | for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; | |
476 | reg++) | |
477 | DUMP_REG(sdma_offset + reg); | |
478 | for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; | |
479 | reg++) | |
480 | DUMP_REG(sdma_offset + reg); | |
481 | for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG; | |
482 | reg++) | |
483 | DUMP_REG(sdma_offset + reg); | |
484 | for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; | |
485 | reg++) | |
486 | DUMP_REG(sdma_offset + reg); | |
487 | ||
488 | WARN_ON_ONCE(i != HQD_N_REGS); | |
489 | *n_regs = i; | |
490 | ||
491 | return 0; | |
492 | } | |
493 | ||
ff758a12 BG |
494 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
495 | uint32_t pipe_id, uint32_t queue_id) | |
496 | { | |
497 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
498 | uint32_t act; | |
499 | bool retval = false; | |
500 | uint32_t low, high; | |
501 | ||
502 | acquire_queue(kgd, pipe_id, queue_id); | |
503 | act = RREG32(mmCP_HQD_ACTIVE); | |
504 | if (act) { | |
505 | low = lower_32_bits(queue_address >> 8); | |
506 | high = upper_32_bits(queue_address >> 8); | |
507 | ||
508 | if (low == RREG32(mmCP_HQD_PQ_BASE) && | |
509 | high == RREG32(mmCP_HQD_PQ_BASE_HI)) | |
510 | retval = true; | |
511 | } | |
512 | release_queue(kgd); | |
513 | return retval; | |
514 | } | |
515 | ||
516 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | |
517 | { | |
518 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
9807c366 | 519 | struct vi_sdma_mqd *m; |
ff758a12 BG |
520 | uint32_t sdma_base_addr; |
521 | uint32_t sdma_rlc_rb_cntl; | |
522 | ||
523 | m = get_sdma_mqd(mqd); | |
524 | sdma_base_addr = get_sdma_base_addr(m); | |
525 | ||
526 | sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | |
527 | ||
528 | if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) | |
529 | return true; | |
530 | ||
531 | return false; | |
532 | } | |
533 | ||
70539bd7 FK |
534 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
535 | enum kfd_preempt_type reset_type, | |
1d602430 | 536 | unsigned int utimeout, uint32_t pipe_id, |
ff758a12 BG |
537 | uint32_t queue_id) |
538 | { | |
539 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
540 | uint32_t temp; | |
70539bd7 FK |
541 | enum hqd_dequeue_request_type type; |
542 | unsigned long flags, end_jiffies; | |
543 | int retry; | |
544 | struct vi_mqd *m = get_mqd(mqd); | |
ff758a12 | 545 | |
1b0bfcff SL |
546 | if (adev->in_gpu_reset) |
547 | return -EIO; | |
548 | ||
ff758a12 BG |
549 | acquire_queue(kgd, pipe_id, queue_id); |
550 | ||
70539bd7 FK |
551 | if (m->cp_hqd_vmid == 0) |
552 | WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); | |
553 | ||
554 | switch (reset_type) { | |
555 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | |
556 | type = DRAIN_PIPE; | |
557 | break; | |
558 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | |
559 | type = RESET_WAVES; | |
560 | break; | |
561 | default: | |
562 | type = DRAIN_PIPE; | |
563 | break; | |
564 | } | |
ff758a12 | 565 | |
70539bd7 FK |
566 | /* Workaround: If IQ timer is active and the wait time is close to or |
567 | * equal to 0, dequeueing is not safe. Wait until either the wait time | |
568 | * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is | |
569 | * cleared before continuing. Also, ensure wait times are set to at | |
570 | * least 0x3. | |
571 | */ | |
572 | local_irq_save(flags); | |
573 | preempt_disable(); | |
574 | retry = 5000; /* wait for 500 usecs at maximum */ | |
575 | while (true) { | |
576 | temp = RREG32(mmCP_HQD_IQ_TIMER); | |
577 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { | |
578 | pr_debug("HW is processing IQ\n"); | |
579 | goto loop; | |
580 | } | |
581 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { | |
582 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) | |
583 | == 3) /* SEM-rearm is safe */ | |
584 | break; | |
585 | /* Wait time 3 is safe for CP, but our MMIO read/write | |
586 | * time is close to 1 microsecond, so check for 10 to | |
587 | * leave more buffer room | |
588 | */ | |
589 | if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) | |
590 | >= 10) | |
591 | break; | |
592 | pr_debug("IQ timer is active\n"); | |
593 | } else | |
594 | break; | |
595 | loop: | |
596 | if (!retry) { | |
597 | pr_err("CP HQD IQ timer status time out\n"); | |
598 | break; | |
599 | } | |
600 | ndelay(100); | |
601 | --retry; | |
602 | } | |
603 | retry = 1000; | |
604 | while (true) { | |
605 | temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); | |
606 | if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) | |
607 | break; | |
608 | pr_debug("Dequeue request is pending\n"); | |
609 | ||
610 | if (!retry) { | |
611 | pr_err("CP HQD dequeue request time out\n"); | |
612 | break; | |
613 | } | |
614 | ndelay(100); | |
615 | --retry; | |
616 | } | |
617 | local_irq_restore(flags); | |
618 | preempt_enable(); | |
619 | ||
620 | WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); | |
621 | ||
622 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
ff758a12 BG |
623 | while (true) { |
624 | temp = RREG32(mmCP_HQD_ACTIVE); | |
70539bd7 | 625 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) |
ff758a12 | 626 | break; |
70539bd7 FK |
627 | if (time_after(jiffies, end_jiffies)) { |
628 | pr_err("cp queue preemption time out.\n"); | |
ff758a12 BG |
629 | release_queue(kgd); |
630 | return -ETIME; | |
631 | } | |
70539bd7 | 632 | usleep_range(500, 1000); |
ff758a12 BG |
633 | } |
634 | ||
635 | release_queue(kgd); | |
636 | return 0; | |
637 | } | |
638 | ||
639 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | |
1d602430 | 640 | unsigned int utimeout) |
ff758a12 BG |
641 | { |
642 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
9807c366 | 643 | struct vi_sdma_mqd *m; |
ff758a12 BG |
644 | uint32_t sdma_base_addr; |
645 | uint32_t temp; | |
fd0f0762 | 646 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; |
ff758a12 BG |
647 | |
648 | m = get_sdma_mqd(mqd); | |
649 | sdma_base_addr = get_sdma_base_addr(m); | |
650 | ||
651 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | |
652 | temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; | |
653 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); | |
654 | ||
655 | while (true) { | |
656 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | |
9807c366 | 657 | if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) |
ff758a12 | 658 | break; |
fd0f0762 | 659 | if (time_after(jiffies, end_jiffies)) |
ff758a12 | 660 | return -ETIME; |
fd0f0762 | 661 | usleep_range(500, 1000); |
ff758a12 BG |
662 | } |
663 | ||
664 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); | |
9807c366 PC |
665 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, |
666 | RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | | |
667 | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); | |
668 | ||
669 | m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); | |
ff758a12 BG |
670 | |
671 | return 0; | |
672 | } | |
673 | ||
674 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | |
675 | uint8_t vmid) | |
676 | { | |
677 | uint32_t reg; | |
678 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
679 | ||
680 | reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); | |
681 | return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
682 | } | |
683 | ||
684 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |
685 | uint8_t vmid) | |
686 | { | |
687 | uint32_t reg; | |
688 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
689 | ||
690 | reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); | |
2f901c25 | 691 | return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; |
ff758a12 BG |
692 | } |
693 | ||
ff758a12 BG |
694 | static int kgd_address_watch_disable(struct kgd_dev *kgd) |
695 | { | |
696 | return 0; | |
697 | } | |
698 | ||
699 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | |
700 | unsigned int watch_point_id, | |
701 | uint32_t cntl_val, | |
702 | uint32_t addr_hi, | |
703 | uint32_t addr_lo) | |
704 | { | |
705 | return 0; | |
706 | } | |
707 | ||
708 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | |
709 | uint32_t gfx_index_val, | |
710 | uint32_t sq_cmd) | |
711 | { | |
712 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
713 | uint32_t data = 0; | |
714 | ||
715 | mutex_lock(&adev->grbm_idx_mutex); | |
716 | ||
717 | WREG32(mmGRBM_GFX_INDEX, gfx_index_val); | |
718 | WREG32(mmSQ_CMD, sq_cmd); | |
719 | ||
720 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
721 | INSTANCE_BROADCAST_WRITES, 1); | |
722 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
723 | SH_BROADCAST_WRITES, 1); | |
724 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
725 | SE_BROADCAST_WRITES, 1); | |
726 | ||
727 | WREG32(mmGRBM_GFX_INDEX, data); | |
728 | mutex_unlock(&adev->grbm_idx_mutex); | |
729 | ||
730 | return 0; | |
731 | } | |
732 | ||
733 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |
734 | unsigned int watch_point_id, | |
735 | unsigned int reg_offset) | |
736 | { | |
737 | return 0; | |
738 | } | |
739 | ||
09e56abb MR |
740 | static void set_scratch_backing_va(struct kgd_dev *kgd, |
741 | uint64_t va, uint32_t vmid) | |
742 | { | |
743 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
744 | ||
745 | lock_srbm(kgd, 0, 0, 0, vmid); | |
746 | WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); | |
747 | unlock_srbm(kgd); | |
748 | } | |
749 | ||
a46a2cd1 | 750 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, |
e715c6d0 | 751 | uint64_t page_table_base) |
a46a2cd1 FK |
752 | { |
753 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
754 | ||
755 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
756 | pr_err("trying to set page table base for wrong VMID\n"); | |
757 | return; | |
758 | } | |
e715c6d0 SL |
759 | WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, |
760 | lower_32_bits(page_table_base)); | |
a46a2cd1 FK |
761 | } |
762 | ||
763 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) | |
764 | { | |
765 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
766 | int vmid; | |
767 | unsigned int tmp; | |
768 | ||
bff418a2 SL |
769 | if (adev->in_gpu_reset) |
770 | return -EIO; | |
771 | ||
a46a2cd1 FK |
772 | for (vmid = 0; vmid < 16; vmid++) { |
773 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) | |
774 | continue; | |
775 | ||
776 | tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); | |
777 | if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && | |
778 | (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { | |
779 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | |
780 | RREG32(mmVM_INVALIDATE_RESPONSE); | |
781 | break; | |
782 | } | |
783 | } | |
784 | ||
785 | return 0; | |
786 | } | |
787 | ||
788 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) | |
789 | { | |
790 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
791 | ||
792 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
793 | pr_err("non kfd vmid %d\n", vmid); | |
794 | return -EINVAL; | |
795 | } | |
796 | ||
797 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | |
798 | RREG32(mmVM_INVALIDATE_RESPONSE); | |
799 | return 0; | |
800 | } |