Commit | Line | Data |
---|---|---|
d5a114a6 FK |
1 | /* |
2 | * Copyright 2014-2018 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
d5a114a6 FK |
22 | #include "amdgpu.h" |
23 | #include "amdgpu_amdkfd.h" | |
d5a114a6 FK |
24 | #include "gc/gc_9_0_offset.h" |
25 | #include "gc/gc_9_0_sh_mask.h" | |
26 | #include "vega10_enum.h" | |
27 | #include "sdma0/sdma0_4_0_offset.h" | |
28 | #include "sdma0/sdma0_4_0_sh_mask.h" | |
29 | #include "sdma1/sdma1_4_0_offset.h" | |
30 | #include "sdma1/sdma1_4_0_sh_mask.h" | |
31 | #include "athub/athub_1_0_offset.h" | |
32 | #include "athub/athub_1_0_sh_mask.h" | |
33 | #include "oss/osssys_4_0_offset.h" | |
34 | #include "oss/osssys_4_0_sh_mask.h" | |
35 | #include "soc15_common.h" | |
36 | #include "v9_structs.h" | |
37 | #include "soc15.h" | |
38 | #include "soc15d.h" | |
e4312d45 AD |
39 | #include "mmhub_v1_0.h" |
40 | #include "gfxhub_v1_0.h" | |
d5a114a6 | 41 | |
d5a114a6 | 42 | |
d5a114a6 FK |
43 | enum hqd_dequeue_request_type { |
44 | NO_ACTION = 0, | |
45 | DRAIN_PIPE, | |
46 | RESET_WAVES | |
47 | }; | |
48 | ||
d5a114a6 FK |
49 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) |
50 | { | |
51 | return (struct amdgpu_device *)kgd; | |
52 | } | |
53 | ||
54 | static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, | |
55 | uint32_t queue, uint32_t vmid) | |
56 | { | |
57 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
58 | ||
59 | mutex_lock(&adev->srbm_mutex); | |
60 | soc15_grbm_select(adev, mec, pipe, queue, vmid); | |
61 | } | |
62 | ||
63 | static void unlock_srbm(struct kgd_dev *kgd) | |
64 | { | |
65 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
66 | ||
67 | soc15_grbm_select(adev, 0, 0, 0, 0); | |
68 | mutex_unlock(&adev->srbm_mutex); | |
69 | } | |
70 | ||
71 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | |
72 | uint32_t queue_id) | |
73 | { | |
74 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
75 | ||
76 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
77 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
78 | ||
79 | lock_srbm(kgd, mec, pipe, queue_id, 0); | |
80 | } | |
81 | ||
35cd89d5 | 82 | static uint64_t get_queue_mask(struct amdgpu_device *adev, |
d5a114a6 FK |
83 | uint32_t pipe_id, uint32_t queue_id) |
84 | { | |
35cd89d5 AL |
85 | unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + |
86 | queue_id; | |
d5a114a6 | 87 | |
35cd89d5 | 88 | return 1ull << bit; |
d5a114a6 FK |
89 | } |
90 | ||
91 | static void release_queue(struct kgd_dev *kgd) | |
92 | { | |
93 | unlock_srbm(kgd); | |
94 | } | |
95 | ||
3e205a08 | 96 | void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, |
d5a114a6 FK |
97 | uint32_t sh_mem_config, |
98 | uint32_t sh_mem_ape1_base, | |
99 | uint32_t sh_mem_ape1_limit, | |
100 | uint32_t sh_mem_bases) | |
101 | { | |
102 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
103 | ||
104 | lock_srbm(kgd, 0, 0, 0, vmid); | |
105 | ||
1bff7f6c TH |
106 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); |
107 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); | |
d5a114a6 FK |
108 | /* APE1 no longer exists on GFX9 */ |
109 | ||
110 | unlock_srbm(kgd); | |
111 | } | |
112 | ||
3e205a08 | 113 | int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, |
d5a114a6 FK |
114 | unsigned int vmid) |
115 | { | |
116 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
117 | ||
118 | /* | |
119 | * We have to assume that there is no outstanding mapping. | |
120 | * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because | |
121 | * a mapping is in progress or because a mapping finished | |
122 | * and the SW cleared it. | |
123 | * So the protocol is to always wait & clear. | |
124 | */ | |
125 | uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | | |
126 | ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
127 | ||
128 | /* | |
129 | * need to do this twice, once for gfx and once for mmhub | |
130 | * for ATC add 16 to VMID for mmhub, for IH different registers. | |
131 | * ATC_VMID0..15 registers are separate from ATC_VMID16..31. | |
132 | */ | |
133 | ||
134 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, | |
135 | pasid_mapping); | |
136 | ||
137 | while (!(RREG32(SOC15_REG_OFFSET( | |
138 | ATHUB, 0, | |
139 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | |
140 | (1U << vmid))) | |
141 | cpu_relax(); | |
142 | ||
143 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | |
144 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | |
145 | 1U << vmid); | |
146 | ||
147 | /* Mapping vmid to pasid also for IH block */ | |
148 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, | |
149 | pasid_mapping); | |
150 | ||
151 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, | |
152 | pasid_mapping); | |
153 | ||
154 | while (!(RREG32(SOC15_REG_OFFSET( | |
155 | ATHUB, 0, | |
156 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | |
157 | (1U << (vmid + 16)))) | |
158 | cpu_relax(); | |
159 | ||
160 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | |
161 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | |
162 | 1U << (vmid + 16)); | |
163 | ||
164 | /* Mapping vmid to pasid also for IH block */ | |
165 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, | |
166 | pasid_mapping); | |
167 | return 0; | |
168 | } | |
169 | ||
170 | /* TODO - RING0 form of field is obsolete, seems to date back to SI | |
171 | * but still works | |
172 | */ | |
173 | ||
3e205a08 | 174 | int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) |
d5a114a6 FK |
175 | { |
176 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
177 | uint32_t mec; | |
178 | uint32_t pipe; | |
179 | ||
180 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
181 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
182 | ||
183 | lock_srbm(kgd, mec, pipe, 0, 0); | |
184 | ||
185 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), | |
186 | CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | | |
187 | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); | |
188 | ||
189 | unlock_srbm(kgd); | |
190 | ||
191 | return 0; | |
192 | } | |
193 | ||
b55a8b8b | 194 | static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, |
d5a114a6 FK |
195 | unsigned int engine_id, |
196 | unsigned int queue_id) | |
197 | { | |
b55a8b8b | 198 | uint32_t sdma_engine_reg_base[2] = { |
d5a114a6 FK |
199 | SOC15_REG_OFFSET(SDMA0, 0, |
200 | mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, | |
201 | SOC15_REG_OFFSET(SDMA1, 0, | |
202 | mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL | |
203 | }; | |
b55a8b8b YZ |
204 | uint32_t retval = sdma_engine_reg_base[engine_id] |
205 | + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); | |
d5a114a6 | 206 | |
b55a8b8b YZ |
207 | pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, |
208 | queue_id, retval); | |
d5a114a6 FK |
209 | |
210 | return retval; | |
211 | } | |
212 | ||
213 | static inline struct v9_mqd *get_mqd(void *mqd) | |
214 | { | |
215 | return (struct v9_mqd *)mqd; | |
216 | } | |
217 | ||
218 | static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) | |
219 | { | |
220 | return (struct v9_sdma_mqd *)mqd; | |
221 | } | |
222 | ||
3e205a08 | 223 | int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
d5a114a6 FK |
224 | uint32_t queue_id, uint32_t __user *wptr, |
225 | uint32_t wptr_shift, uint32_t wptr_mask, | |
226 | struct mm_struct *mm) | |
227 | { | |
228 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
229 | struct v9_mqd *m; | |
230 | uint32_t *mqd_hqd; | |
231 | uint32_t reg, hqd_base, data; | |
232 | ||
233 | m = get_mqd(mqd); | |
234 | ||
235 | acquire_queue(kgd, pipe_id, queue_id); | |
236 | ||
d5a114a6 FK |
237 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ |
238 | mqd_hqd = &m->cp_mqd_base_addr_lo; | |
239 | hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | |
240 | ||
241 | for (reg = hqd_base; | |
242 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | |
1bff7f6c | 243 | WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); |
d5a114a6 FK |
244 | |
245 | ||
246 | /* Activate doorbell logic before triggering WPTR poll. */ | |
247 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | |
248 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | |
1bff7f6c | 249 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); |
d5a114a6 FK |
250 | |
251 | if (wptr) { | |
252 | /* Don't read wptr with get_user because the user | |
253 | * context may not be accessible (if this function | |
254 | * runs in a work queue). Instead trigger a one-shot | |
255 | * polling read from memory in the CP. This assumes | |
256 | * that wptr is GPU-accessible in the queue's VMID via | |
257 | * ATC or SVM. WPTR==RPTR before starting the poll so | |
258 | * the CP starts fetching new commands from the right | |
259 | * place. | |
260 | * | |
261 | * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit | |
262 | * tricky. Assume that the queue didn't overflow. The | |
263 | * number of valid bits in the 32-bit RPTR depends on | |
264 | * the queue size. The remaining bits are taken from | |
265 | * the saved 64-bit WPTR. If the WPTR wrapped, add the | |
266 | * queue size. | |
267 | */ | |
268 | uint32_t queue_size = | |
269 | 2 << REG_GET_FIELD(m->cp_hqd_pq_control, | |
270 | CP_HQD_PQ_CONTROL, QUEUE_SIZE); | |
271 | uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); | |
272 | ||
273 | if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) | |
274 | guessed_wptr += queue_size; | |
275 | guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); | |
276 | guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; | |
277 | ||
1bff7f6c | 278 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), |
d5a114a6 | 279 | lower_32_bits(guessed_wptr)); |
1bff7f6c | 280 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), |
d5a114a6 | 281 | upper_32_bits(guessed_wptr)); |
1bff7f6c | 282 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), |
ebe1d22b | 283 | lower_32_bits((uintptr_t)wptr)); |
1bff7f6c | 284 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), |
ebe1d22b | 285 | upper_32_bits((uintptr_t)wptr)); |
d5a114a6 | 286 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), |
35cd89d5 | 287 | (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); |
d5a114a6 FK |
288 | } |
289 | ||
290 | /* Start the EOP fetcher */ | |
1bff7f6c | 291 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), |
d5a114a6 FK |
292 | REG_SET_FIELD(m->cp_hqd_eop_rptr, |
293 | CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); | |
294 | ||
295 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | |
1bff7f6c | 296 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); |
d5a114a6 FK |
297 | |
298 | release_queue(kgd); | |
299 | ||
300 | return 0; | |
301 | } | |
302 | ||
35cd89d5 AL |
303 | int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, |
304 | uint32_t pipe_id, uint32_t queue_id, | |
305 | uint32_t doorbell_off) | |
306 | { | |
307 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
308 | struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; | |
309 | struct v9_mqd *m; | |
310 | uint32_t mec, pipe; | |
311 | int r; | |
312 | ||
313 | m = get_mqd(mqd); | |
314 | ||
315 | acquire_queue(kgd, pipe_id, queue_id); | |
316 | ||
317 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
318 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
319 | ||
320 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | |
321 | mec, pipe, queue_id); | |
322 | ||
323 | spin_lock(&adev->gfx.kiq.ring_lock); | |
324 | r = amdgpu_ring_alloc(kiq_ring, 7); | |
325 | if (r) { | |
326 | pr_err("Failed to alloc KIQ (%d).\n", r); | |
327 | goto out_unlock; | |
328 | } | |
329 | ||
330 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); | |
331 | amdgpu_ring_write(kiq_ring, | |
332 | PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ | |
333 | PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ | |
334 | PACKET3_MAP_QUEUES_QUEUE(queue_id) | | |
335 | PACKET3_MAP_QUEUES_PIPE(pipe) | | |
336 | PACKET3_MAP_QUEUES_ME((mec - 1)) | | |
337 | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ | |
338 | PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ | |
339 | PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ | |
340 | PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ | |
341 | amdgpu_ring_write(kiq_ring, | |
342 | PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); | |
343 | amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); | |
344 | amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); | |
345 | amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); | |
346 | amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); | |
347 | amdgpu_ring_commit(kiq_ring); | |
348 | ||
349 | out_unlock: | |
350 | spin_unlock(&adev->gfx.kiq.ring_lock); | |
351 | release_queue(kgd); | |
352 | ||
353 | return r; | |
354 | } | |
355 | ||
3e205a08 | 356 | int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, |
d5a114a6 FK |
357 | uint32_t pipe_id, uint32_t queue_id, |
358 | uint32_t (**dump)[2], uint32_t *n_regs) | |
359 | { | |
360 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
361 | uint32_t i = 0, reg; | |
362 | #define HQD_N_REGS 56 | |
363 | #define DUMP_REG(addr) do { \ | |
364 | if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ | |
365 | break; \ | |
366 | (*dump)[i][0] = (addr) << 2; \ | |
367 | (*dump)[i++][1] = RREG32(addr); \ | |
368 | } while (0) | |
369 | ||
6da2ec56 | 370 | *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); |
d5a114a6 FK |
371 | if (*dump == NULL) |
372 | return -ENOMEM; | |
373 | ||
374 | acquire_queue(kgd, pipe_id, queue_id); | |
375 | ||
376 | for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | |
377 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | |
378 | DUMP_REG(reg); | |
379 | ||
380 | release_queue(kgd); | |
381 | ||
382 | WARN_ON_ONCE(i != HQD_N_REGS); | |
383 | *n_regs = i; | |
384 | ||
385 | return 0; | |
386 | } | |
387 | ||
388 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, | |
389 | uint32_t __user *wptr, struct mm_struct *mm) | |
390 | { | |
391 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
392 | struct v9_sdma_mqd *m; | |
b55a8b8b | 393 | uint32_t sdma_rlc_reg_offset; |
d5a114a6 FK |
394 | unsigned long end_jiffies; |
395 | uint32_t data; | |
396 | uint64_t data64; | |
397 | uint64_t __user *wptr64 = (uint64_t __user *)wptr; | |
398 | ||
399 | m = get_sdma_mqd(mqd); | |
b55a8b8b | 400 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, |
d5a114a6 | 401 | m->sdma_queue_id); |
d5a114a6 | 402 | |
b55a8b8b | 403 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, |
d5a114a6 FK |
404 | m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); |
405 | ||
406 | end_jiffies = msecs_to_jiffies(2000) + jiffies; | |
407 | while (true) { | |
b55a8b8b | 408 | data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); |
d5a114a6 FK |
409 | if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) |
410 | break; | |
812330eb YZ |
411 | if (time_after(jiffies, end_jiffies)) { |
412 | pr_err("SDMA RLC not idle in %s\n", __func__); | |
d5a114a6 | 413 | return -ETIME; |
812330eb | 414 | } |
d5a114a6 FK |
415 | usleep_range(500, 1000); |
416 | } | |
d5a114a6 | 417 | |
b55a8b8b | 418 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, |
d5a114a6 FK |
419 | m->sdmax_rlcx_doorbell_offset); |
420 | ||
421 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, | |
422 | ENABLE, 1); | |
b55a8b8b YZ |
423 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); |
424 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, | |
425 | m->sdmax_rlcx_rb_rptr); | |
426 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, | |
d5a114a6 FK |
427 | m->sdmax_rlcx_rb_rptr_hi); |
428 | ||
b55a8b8b | 429 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); |
d5a114a6 | 430 | if (read_user_wptr(mm, wptr64, data64)) { |
b55a8b8b | 431 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, |
d5a114a6 | 432 | lower_32_bits(data64)); |
b55a8b8b | 433 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, |
d5a114a6 FK |
434 | upper_32_bits(data64)); |
435 | } else { | |
b55a8b8b | 436 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, |
d5a114a6 | 437 | m->sdmax_rlcx_rb_rptr); |
b55a8b8b | 438 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, |
d5a114a6 FK |
439 | m->sdmax_rlcx_rb_rptr_hi); |
440 | } | |
b55a8b8b | 441 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); |
d5a114a6 | 442 | |
b55a8b8b YZ |
443 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); |
444 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, | |
d5a114a6 | 445 | m->sdmax_rlcx_rb_base_hi); |
b55a8b8b | 446 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, |
d5a114a6 | 447 | m->sdmax_rlcx_rb_rptr_addr_lo); |
b55a8b8b | 448 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, |
d5a114a6 FK |
449 | m->sdmax_rlcx_rb_rptr_addr_hi); |
450 | ||
451 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, | |
452 | RB_ENABLE, 1); | |
b55a8b8b | 453 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); |
d5a114a6 FK |
454 | |
455 | return 0; | |
456 | } | |
457 | ||
458 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, | |
459 | uint32_t engine_id, uint32_t queue_id, | |
460 | uint32_t (**dump)[2], uint32_t *n_regs) | |
461 | { | |
462 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
b55a8b8b YZ |
463 | uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, |
464 | engine_id, queue_id); | |
d5a114a6 FK |
465 | uint32_t i = 0, reg; |
466 | #undef HQD_N_REGS | |
467 | #define HQD_N_REGS (19+6+7+10) | |
468 | ||
6da2ec56 | 469 | *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); |
d5a114a6 FK |
470 | if (*dump == NULL) |
471 | return -ENOMEM; | |
472 | ||
473 | for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) | |
b55a8b8b | 474 | DUMP_REG(sdma_rlc_reg_offset + reg); |
d5a114a6 | 475 | for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) |
b55a8b8b | 476 | DUMP_REG(sdma_rlc_reg_offset + reg); |
d5a114a6 FK |
477 | for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; |
478 | reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) | |
b55a8b8b | 479 | DUMP_REG(sdma_rlc_reg_offset + reg); |
d5a114a6 FK |
480 | for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; |
481 | reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) | |
b55a8b8b | 482 | DUMP_REG(sdma_rlc_reg_offset + reg); |
d5a114a6 FK |
483 | |
484 | WARN_ON_ONCE(i != HQD_N_REGS); | |
485 | *n_regs = i; | |
486 | ||
487 | return 0; | |
488 | } | |
489 | ||
3e205a08 | 490 | bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
d5a114a6 FK |
491 | uint32_t pipe_id, uint32_t queue_id) |
492 | { | |
493 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
494 | uint32_t act; | |
495 | bool retval = false; | |
496 | uint32_t low, high; | |
497 | ||
498 | acquire_queue(kgd, pipe_id, queue_id); | |
499 | act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | |
500 | if (act) { | |
501 | low = lower_32_bits(queue_address >> 8); | |
502 | high = upper_32_bits(queue_address >> 8); | |
503 | ||
504 | if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && | |
505 | high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) | |
506 | retval = true; | |
507 | } | |
508 | release_queue(kgd); | |
509 | return retval; | |
510 | } | |
511 | ||
512 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | |
513 | { | |
514 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
515 | struct v9_sdma_mqd *m; | |
b55a8b8b | 516 | uint32_t sdma_rlc_reg_offset; |
d5a114a6 FK |
517 | uint32_t sdma_rlc_rb_cntl; |
518 | ||
519 | m = get_sdma_mqd(mqd); | |
b55a8b8b | 520 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, |
d5a114a6 FK |
521 | m->sdma_queue_id); |
522 | ||
b55a8b8b | 523 | sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); |
d5a114a6 FK |
524 | |
525 | if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) | |
526 | return true; | |
527 | ||
528 | return false; | |
529 | } | |
530 | ||
3e205a08 | 531 | int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
d5a114a6 FK |
532 | enum kfd_preempt_type reset_type, |
533 | unsigned int utimeout, uint32_t pipe_id, | |
534 | uint32_t queue_id) | |
535 | { | |
536 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
537 | enum hqd_dequeue_request_type type; | |
538 | unsigned long end_jiffies; | |
539 | uint32_t temp; | |
540 | struct v9_mqd *m = get_mqd(mqd); | |
541 | ||
1b0bfcff SL |
542 | if (adev->in_gpu_reset) |
543 | return -EIO; | |
544 | ||
d5a114a6 FK |
545 | acquire_queue(kgd, pipe_id, queue_id); |
546 | ||
547 | if (m->cp_hqd_vmid == 0) | |
1bff7f6c | 548 | WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); |
d5a114a6 FK |
549 | |
550 | switch (reset_type) { | |
551 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | |
552 | type = DRAIN_PIPE; | |
553 | break; | |
554 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | |
555 | type = RESET_WAVES; | |
556 | break; | |
557 | default: | |
558 | type = DRAIN_PIPE; | |
559 | break; | |
560 | } | |
561 | ||
1bff7f6c | 562 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); |
d5a114a6 FK |
563 | |
564 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
565 | while (true) { | |
566 | temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | |
567 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) | |
568 | break; | |
569 | if (time_after(jiffies, end_jiffies)) { | |
570 | pr_err("cp queue preemption time out.\n"); | |
571 | release_queue(kgd); | |
572 | return -ETIME; | |
573 | } | |
574 | usleep_range(500, 1000); | |
575 | } | |
576 | ||
577 | release_queue(kgd); | |
578 | return 0; | |
579 | } | |
580 | ||
581 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | |
582 | unsigned int utimeout) | |
583 | { | |
584 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
585 | struct v9_sdma_mqd *m; | |
b55a8b8b | 586 | uint32_t sdma_rlc_reg_offset; |
d5a114a6 FK |
587 | uint32_t temp; |
588 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
589 | ||
590 | m = get_sdma_mqd(mqd); | |
b55a8b8b | 591 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, |
d5a114a6 FK |
592 | m->sdma_queue_id); |
593 | ||
b55a8b8b | 594 | temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); |
d5a114a6 | 595 | temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; |
b55a8b8b | 596 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); |
d5a114a6 FK |
597 | |
598 | while (true) { | |
b55a8b8b | 599 | temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); |
d5a114a6 FK |
600 | if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) |
601 | break; | |
812330eb YZ |
602 | if (time_after(jiffies, end_jiffies)) { |
603 | pr_err("SDMA RLC not idle in %s\n", __func__); | |
d5a114a6 | 604 | return -ETIME; |
812330eb | 605 | } |
d5a114a6 FK |
606 | usleep_range(500, 1000); |
607 | } | |
608 | ||
b55a8b8b YZ |
609 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); |
610 | WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, | |
611 | RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | | |
d5a114a6 FK |
612 | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); |
613 | ||
b55a8b8b | 614 | m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); |
d5a114a6 | 615 | m->sdmax_rlcx_rb_rptr_hi = |
b55a8b8b | 616 | RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); |
d5a114a6 FK |
617 | |
618 | return 0; | |
619 | } | |
620 | ||
56fc40ab YZ |
621 | bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, |
622 | uint8_t vmid, uint16_t *p_pasid) | |
d5a114a6 | 623 | { |
56fc40ab | 624 | uint32_t value; |
d5a114a6 FK |
625 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
626 | ||
56fc40ab | 627 | value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) |
d5a114a6 | 628 | + vmid); |
56fc40ab | 629 | *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; |
d5a114a6 | 630 | |
56fc40ab | 631 | return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); |
d5a114a6 FK |
632 | } |
633 | ||
3e205a08 | 634 | int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) |
d5a114a6 FK |
635 | { |
636 | return 0; | |
637 | } | |
638 | ||
3e205a08 | 639 | int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, |
d5a114a6 FK |
640 | unsigned int watch_point_id, |
641 | uint32_t cntl_val, | |
642 | uint32_t addr_hi, | |
643 | uint32_t addr_lo) | |
644 | { | |
645 | return 0; | |
646 | } | |
647 | ||
3e205a08 | 648 | int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, |
d5a114a6 FK |
649 | uint32_t gfx_index_val, |
650 | uint32_t sq_cmd) | |
651 | { | |
652 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
653 | uint32_t data = 0; | |
654 | ||
655 | mutex_lock(&adev->grbm_idx_mutex); | |
656 | ||
1bff7f6c | 657 | WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); |
d5a114a6 FK |
658 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); |
659 | ||
660 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
661 | INSTANCE_BROADCAST_WRITES, 1); | |
662 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
663 | SH_BROADCAST_WRITES, 1); | |
664 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
665 | SE_BROADCAST_WRITES, 1); | |
666 | ||
1bff7f6c | 667 | WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); |
d5a114a6 FK |
668 | mutex_unlock(&adev->grbm_idx_mutex); |
669 | ||
670 | return 0; | |
671 | } | |
672 | ||
3e205a08 | 673 | uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, |
d5a114a6 FK |
674 | unsigned int watch_point_id, |
675 | unsigned int reg_offset) | |
676 | { | |
677 | return 0; | |
678 | } | |
679 | ||
ad5901df YZ |
680 | static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, |
681 | uint32_t vmid, uint64_t page_table_base) | |
d5a114a6 FK |
682 | { |
683 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
d5a114a6 FK |
684 | |
685 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
686 | pr_err("trying to set page table base for wrong VMID %u\n", | |
687 | vmid); | |
688 | return; | |
689 | } | |
690 | ||
ad5901df | 691 | mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); |
d5a114a6 | 692 | |
435e2f97 | 693 | gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); |
d5a114a6 | 694 | } |
3e205a08 | 695 | |
e392c887 | 696 | const struct kfd2kgd_calls gfx_v9_kfd2kgd = { |
3e205a08 OZ |
697 | .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, |
698 | .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, | |
699 | .init_interrupts = kgd_gfx_v9_init_interrupts, | |
700 | .hqd_load = kgd_gfx_v9_hqd_load, | |
35cd89d5 | 701 | .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, |
3e205a08 OZ |
702 | .hqd_sdma_load = kgd_hqd_sdma_load, |
703 | .hqd_dump = kgd_gfx_v9_hqd_dump, | |
704 | .hqd_sdma_dump = kgd_hqd_sdma_dump, | |
705 | .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, | |
706 | .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, | |
707 | .hqd_destroy = kgd_gfx_v9_hqd_destroy, | |
708 | .hqd_sdma_destroy = kgd_hqd_sdma_destroy, | |
709 | .address_watch_disable = kgd_gfx_v9_address_watch_disable, | |
710 | .address_watch_execute = kgd_gfx_v9_address_watch_execute, | |
711 | .wave_control_execute = kgd_gfx_v9_wave_control_execute, | |
712 | .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, | |
56fc40ab YZ |
713 | .get_atc_vmid_pasid_mapping_info = |
714 | kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, | |
3e205a08 | 715 | .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, |
3e205a08 | 716 | .get_hive_id = amdgpu_amdkfd_get_hive_id, |
0c663695 | 717 | .get_unique_id = amdgpu_amdkfd_get_unique_id, |
3e205a08 | 718 | }; |