Commit | Line | Data |
---|---|---|
cc009e61 MJ |
1 | /* |
2 | * Copyright 2021 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | #include <linux/mmu_context.h> | |
23 | #include "amdgpu.h" | |
24 | #include "amdgpu_amdkfd.h" | |
25 | #include "gc/gc_11_0_0_offset.h" | |
26 | #include "gc/gc_11_0_0_sh_mask.h" | |
27 | #include "oss/osssys_6_0_0_offset.h" | |
28 | #include "oss/osssys_6_0_0_sh_mask.h" | |
29 | #include "soc15_common.h" | |
30 | #include "soc15d.h" | |
31 | #include "v11_structs.h" | |
32 | #include "soc21.h" | |
33 | ||
34 | enum hqd_dequeue_request_type { | |
35 | NO_ACTION = 0, | |
36 | DRAIN_PIPE, | |
37 | RESET_WAVES, | |
38 | SAVE_WAVES | |
39 | }; | |
40 | ||
41 | static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, | |
42 | uint32_t queue, uint32_t vmid) | |
43 | { | |
44 | mutex_lock(&adev->srbm_mutex); | |
45 | soc21_grbm_select(adev, mec, pipe, queue, vmid); | |
46 | } | |
47 | ||
48 | static void unlock_srbm(struct amdgpu_device *adev) | |
49 | { | |
50 | soc21_grbm_select(adev, 0, 0, 0, 0); | |
51 | mutex_unlock(&adev->srbm_mutex); | |
52 | } | |
53 | ||
54 | static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, | |
55 | uint32_t queue_id) | |
56 | { | |
57 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
58 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
59 | ||
60 | lock_srbm(adev, mec, pipe, queue_id, 0); | |
61 | } | |
62 | ||
63 | static uint64_t get_queue_mask(struct amdgpu_device *adev, | |
64 | uint32_t pipe_id, uint32_t queue_id) | |
65 | { | |
66 | unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + | |
67 | queue_id; | |
68 | ||
69 | return 1ull << bit; | |
70 | } | |
71 | ||
72 | static void release_queue(struct amdgpu_device *adev) | |
73 | { | |
74 | unlock_srbm(adev); | |
75 | } | |
76 | ||
77 | static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid, | |
78 | uint32_t sh_mem_config, | |
79 | uint32_t sh_mem_ape1_base, | |
80 | uint32_t sh_mem_ape1_limit, | |
81 | uint32_t sh_mem_bases) | |
82 | { | |
83 | lock_srbm(adev, 0, 0, 0, vmid); | |
84 | ||
85 | WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config); | |
86 | WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases); | |
87 | ||
88 | unlock_srbm(adev); | |
89 | } | |
90 | ||
91 | static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid, | |
92 | unsigned int vmid) | |
93 | { | |
94 | uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; | |
95 | ||
96 | /* Mapping vmid to pasid also for IH block */ | |
97 | pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n", | |
98 | vmid, pasid); | |
99 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value); | |
100 | ||
101 | return 0; | |
102 | } | |
103 | ||
104 | static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id) | |
105 | { | |
106 | uint32_t mec; | |
107 | uint32_t pipe; | |
108 | ||
109 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
110 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
111 | ||
112 | lock_srbm(adev, mec, pipe, 0, 0); | |
113 | ||
97a3d609 | 114 | WREG32_SOC15(GC, 0, regCPC_INT_CNTL, |
cc009e61 MJ |
115 | CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | |
116 | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); | |
117 | ||
118 | unlock_srbm(adev); | |
119 | ||
120 | return 0; | |
121 | } | |
122 | ||
123 | static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, | |
124 | unsigned int engine_id, | |
125 | unsigned int queue_id) | |
126 | { | |
127 | uint32_t sdma_engine_reg_base = 0; | |
128 | uint32_t sdma_rlc_reg_offset; | |
129 | ||
130 | switch (engine_id) { | |
131 | case 0: | |
132 | sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, | |
133 | regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL; | |
134 | break; | |
135 | case 1: | |
136 | sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, | |
137 | regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL; | |
138 | break; | |
139 | default: | |
140 | BUG(); | |
141 | } | |
142 | ||
143 | sdma_rlc_reg_offset = sdma_engine_reg_base | |
144 | + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL); | |
145 | ||
146 | pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, | |
147 | queue_id, sdma_rlc_reg_offset); | |
148 | ||
149 | return sdma_rlc_reg_offset; | |
150 | } | |
151 | ||
152 | static inline struct v11_compute_mqd *get_mqd(void *mqd) | |
153 | { | |
154 | return (struct v11_compute_mqd *)mqd; | |
155 | } | |
156 | ||
157 | static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd) | |
158 | { | |
159 | return (struct v11_sdma_mqd *)mqd; | |
160 | } | |
161 | ||
162 | static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, | |
163 | uint32_t queue_id, uint32_t __user *wptr, | |
164 | uint32_t wptr_shift, uint32_t wptr_mask, | |
165 | struct mm_struct *mm) | |
166 | { | |
167 | struct v11_compute_mqd *m; | |
168 | uint32_t *mqd_hqd; | |
169 | uint32_t reg, hqd_base, data; | |
170 | ||
171 | m = get_mqd(mqd); | |
172 | ||
173 | pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); | |
174 | acquire_queue(adev, pipe_id, queue_id); | |
175 | ||
176 | /* HIQ is set during driver init period with vmid set to 0*/ | |
177 | if (m->cp_hqd_vmid == 0) { | |
178 | uint32_t value, mec, pipe; | |
179 | ||
180 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
181 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
182 | ||
183 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | |
184 | mec, pipe, queue_id); | |
185 | value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS)); | |
186 | value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, | |
187 | ((mec << 5) | (pipe << 3) | queue_id | 0x80)); | |
188 | WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value); | |
189 | } | |
190 | ||
191 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ | |
192 | mqd_hqd = &m->cp_mqd_base_addr_lo; | |
193 | hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR); | |
194 | ||
195 | for (reg = hqd_base; | |
196 | reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++) | |
197 | WREG32(reg, mqd_hqd[reg - hqd_base]); | |
198 | ||
199 | ||
200 | /* Activate doorbell logic before triggering WPTR poll. */ | |
201 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | |
202 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | |
203 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data); | |
204 | ||
205 | if (wptr) { | |
206 | /* Don't read wptr with get_user because the user | |
207 | * context may not be accessible (if this function | |
208 | * runs in a work queue). Instead trigger a one-shot | |
209 | * polling read from memory in the CP. This assumes | |
210 | * that wptr is GPU-accessible in the queue's VMID via | |
211 | * ATC or SVM. WPTR==RPTR before starting the poll so | |
212 | * the CP starts fetching new commands from the right | |
213 | * place. | |
214 | * | |
215 | * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit | |
216 | * tricky. Assume that the queue didn't overflow. The | |
217 | * number of valid bits in the 32-bit RPTR depends on | |
218 | * the queue size. The remaining bits are taken from | |
219 | * the saved 64-bit WPTR. If the WPTR wrapped, add the | |
220 | * queue size. | |
221 | */ | |
222 | uint32_t queue_size = | |
223 | 2 << REG_GET_FIELD(m->cp_hqd_pq_control, | |
224 | CP_HQD_PQ_CONTROL, QUEUE_SIZE); | |
225 | uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); | |
226 | ||
227 | if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) | |
228 | guessed_wptr += queue_size; | |
229 | guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); | |
230 | guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; | |
231 | ||
232 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO), | |
233 | lower_32_bits(guessed_wptr)); | |
234 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI), | |
235 | upper_32_bits(guessed_wptr)); | |
236 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), | |
237 | lower_32_bits((uint64_t)wptr)); | |
238 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), | |
239 | upper_32_bits((uint64_t)wptr)); | |
240 | pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, | |
241 | (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); | |
242 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1), | |
243 | (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); | |
244 | } | |
245 | ||
246 | /* Start the EOP fetcher */ | |
247 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR), | |
248 | REG_SET_FIELD(m->cp_hqd_eop_rptr, | |
249 | CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); | |
250 | ||
251 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | |
252 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data); | |
253 | ||
254 | release_queue(adev); | |
255 | ||
256 | return 0; | |
257 | } | |
258 | ||
259 | static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd, | |
260 | uint32_t pipe_id, uint32_t queue_id, | |
261 | uint32_t doorbell_off) | |
262 | { | |
263 | struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; | |
264 | struct v11_compute_mqd *m; | |
265 | uint32_t mec, pipe; | |
266 | int r; | |
267 | ||
268 | m = get_mqd(mqd); | |
269 | ||
270 | acquire_queue(adev, pipe_id, queue_id); | |
271 | ||
272 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
273 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
274 | ||
275 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | |
276 | mec, pipe, queue_id); | |
277 | ||
278 | spin_lock(&adev->gfx.kiq.ring_lock); | |
279 | r = amdgpu_ring_alloc(kiq_ring, 7); | |
280 | if (r) { | |
281 | pr_err("Failed to alloc KIQ (%d).\n", r); | |
282 | goto out_unlock; | |
283 | } | |
284 | ||
285 | amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); | |
286 | amdgpu_ring_write(kiq_ring, | |
287 | PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ | |
288 | PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ | |
289 | PACKET3_MAP_QUEUES_QUEUE(queue_id) | | |
290 | PACKET3_MAP_QUEUES_PIPE(pipe) | | |
291 | PACKET3_MAP_QUEUES_ME((mec - 1)) | | |
292 | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ | |
293 | PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ | |
294 | PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ | |
295 | PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ | |
296 | amdgpu_ring_write(kiq_ring, | |
297 | PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); | |
298 | amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); | |
299 | amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); | |
300 | amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); | |
301 | amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); | |
302 | amdgpu_ring_commit(kiq_ring); | |
303 | ||
304 | out_unlock: | |
305 | spin_unlock(&adev->gfx.kiq.ring_lock); | |
306 | release_queue(adev); | |
307 | ||
308 | return r; | |
309 | } | |
310 | ||
311 | static int hqd_dump_v11(struct amdgpu_device *adev, | |
312 | uint32_t pipe_id, uint32_t queue_id, | |
313 | uint32_t (**dump)[2], uint32_t *n_regs) | |
314 | { | |
315 | uint32_t i = 0, reg; | |
316 | #define HQD_N_REGS 56 | |
317 | #define DUMP_REG(addr) do { \ | |
318 | if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ | |
319 | break; \ | |
320 | (*dump)[i][0] = (addr) << 2; \ | |
321 | (*dump)[i++][1] = RREG32(addr); \ | |
322 | } while (0) | |
323 | ||
324 | *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); | |
325 | if (*dump == NULL) | |
326 | return -ENOMEM; | |
327 | ||
328 | acquire_queue(adev, pipe_id, queue_id); | |
329 | ||
330 | for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR); | |
331 | reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++) | |
332 | DUMP_REG(reg); | |
333 | ||
334 | release_queue(adev); | |
335 | ||
336 | WARN_ON_ONCE(i != HQD_N_REGS); | |
337 | *n_regs = i; | |
338 | ||
339 | return 0; | |
340 | } | |
341 | ||
342 | static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd, | |
343 | uint32_t __user *wptr, struct mm_struct *mm) | |
344 | { | |
345 | struct v11_sdma_mqd *m; | |
346 | uint32_t sdma_rlc_reg_offset; | |
347 | unsigned long end_jiffies; | |
348 | uint32_t data; | |
349 | uint64_t data64; | |
350 | uint64_t __user *wptr64 = (uint64_t __user *)wptr; | |
351 | ||
352 | m = get_sdma_mqd(mqd); | |
353 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, | |
354 | m->sdma_queue_id); | |
355 | ||
356 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, | |
357 | m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)); | |
358 | ||
359 | end_jiffies = msecs_to_jiffies(2000) + jiffies; | |
360 | while (true) { | |
361 | data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS); | |
362 | if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK) | |
363 | break; | |
364 | if (time_after(jiffies, end_jiffies)) { | |
365 | pr_err("SDMA RLC not idle in %s\n", __func__); | |
366 | return -ETIME; | |
367 | } | |
368 | usleep_range(500, 1000); | |
369 | } | |
370 | ||
371 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET, | |
372 | m->sdmax_rlcx_doorbell_offset); | |
373 | ||
374 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL, | |
375 | ENABLE, 1); | |
376 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data); | |
377 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR, | |
378 | m->sdmax_rlcx_rb_rptr); | |
379 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI, | |
380 | m->sdmax_rlcx_rb_rptr_hi); | |
381 | ||
382 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1); | |
383 | if (read_user_wptr(mm, wptr64, data64)) { | |
384 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR, | |
385 | lower_32_bits(data64)); | |
386 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI, | |
387 | upper_32_bits(data64)); | |
388 | } else { | |
389 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR, | |
390 | m->sdmax_rlcx_rb_rptr); | |
391 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI, | |
392 | m->sdmax_rlcx_rb_rptr_hi); | |
393 | } | |
394 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0); | |
395 | ||
396 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base); | |
397 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI, | |
398 | m->sdmax_rlcx_rb_base_hi); | |
399 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO, | |
400 | m->sdmax_rlcx_rb_rptr_addr_lo); | |
401 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI, | |
402 | m->sdmax_rlcx_rb_rptr_addr_hi); | |
403 | ||
404 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL, | |
405 | RB_ENABLE, 1); | |
406 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data); | |
407 | ||
408 | return 0; | |
409 | } | |
410 | ||
411 | static int hqd_sdma_dump_v11(struct amdgpu_device *adev, | |
412 | uint32_t engine_id, uint32_t queue_id, | |
413 | uint32_t (**dump)[2], uint32_t *n_regs) | |
414 | { | |
415 | uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, | |
416 | engine_id, queue_id); | |
417 | uint32_t i = 0, reg; | |
418 | #undef HQD_N_REGS | |
419 | #define HQD_N_REGS (7+11+1+12+12) | |
420 | ||
421 | *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); | |
422 | if (*dump == NULL) | |
423 | return -ENOMEM; | |
424 | ||
425 | for (reg = regSDMA0_QUEUE0_RB_CNTL; | |
426 | reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++) | |
427 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
428 | for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI; | |
429 | reg <= regSDMA0_QUEUE0_DOORBELL; reg++) | |
430 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
431 | for (reg = regSDMA0_QUEUE0_DOORBELL_LOG; | |
432 | reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++) | |
433 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
434 | for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET; | |
435 | reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++) | |
436 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
437 | for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0; | |
438 | reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++) | |
439 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
440 | ||
441 | WARN_ON_ONCE(i != HQD_N_REGS); | |
442 | *n_regs = i; | |
443 | ||
444 | return 0; | |
445 | } | |
446 | ||
447 | static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address, | |
448 | uint32_t pipe_id, uint32_t queue_id) | |
449 | { | |
450 | uint32_t act; | |
451 | bool retval = false; | |
452 | uint32_t low, high; | |
453 | ||
454 | acquire_queue(adev, pipe_id, queue_id); | |
455 | act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE)); | |
456 | if (act) { | |
457 | low = lower_32_bits(queue_address >> 8); | |
458 | high = upper_32_bits(queue_address >> 8); | |
459 | ||
460 | if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) && | |
461 | high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI))) | |
462 | retval = true; | |
463 | } | |
464 | release_queue(adev); | |
465 | return retval; | |
466 | } | |
467 | ||
468 | static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd) | |
469 | { | |
470 | struct v11_sdma_mqd *m; | |
471 | uint32_t sdma_rlc_reg_offset; | |
472 | uint32_t sdma_rlc_rb_cntl; | |
473 | ||
474 | m = get_sdma_mqd(mqd); | |
475 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, | |
476 | m->sdma_queue_id); | |
477 | ||
478 | sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL); | |
479 | ||
480 | if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK) | |
481 | return true; | |
482 | ||
483 | return false; | |
484 | } | |
485 | ||
486 | static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd, | |
487 | enum kfd_preempt_type reset_type, | |
488 | unsigned int utimeout, uint32_t pipe_id, | |
489 | uint32_t queue_id) | |
490 | { | |
491 | enum hqd_dequeue_request_type type; | |
492 | unsigned long end_jiffies; | |
493 | uint32_t temp; | |
494 | struct v11_compute_mqd *m = get_mqd(mqd); | |
495 | ||
496 | acquire_queue(adev, pipe_id, queue_id); | |
497 | ||
498 | if (m->cp_hqd_vmid == 0) | |
499 | WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); | |
500 | ||
501 | switch (reset_type) { | |
502 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | |
503 | type = DRAIN_PIPE; | |
504 | break; | |
505 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | |
506 | type = RESET_WAVES; | |
507 | break; | |
508 | default: | |
509 | type = DRAIN_PIPE; | |
510 | break; | |
511 | } | |
512 | ||
513 | WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type); | |
514 | ||
515 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
516 | while (true) { | |
517 | temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE)); | |
518 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) | |
519 | break; | |
520 | if (time_after(jiffies, end_jiffies)) { | |
521 | pr_err("cp queue pipe %d queue %d preemption failed\n", | |
522 | pipe_id, queue_id); | |
523 | release_queue(adev); | |
524 | return -ETIME; | |
525 | } | |
526 | usleep_range(500, 1000); | |
527 | } | |
528 | ||
529 | release_queue(adev); | |
530 | return 0; | |
531 | } | |
532 | ||
533 | static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd, | |
534 | unsigned int utimeout) | |
535 | { | |
536 | struct v11_sdma_mqd *m; | |
537 | uint32_t sdma_rlc_reg_offset; | |
538 | uint32_t temp; | |
539 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
540 | ||
541 | m = get_sdma_mqd(mqd); | |
542 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, | |
543 | m->sdma_queue_id); | |
544 | ||
545 | temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL); | |
546 | temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK; | |
547 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp); | |
548 | ||
549 | while (true) { | |
550 | temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS); | |
551 | if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK) | |
552 | break; | |
553 | if (time_after(jiffies, end_jiffies)) { | |
554 | pr_err("SDMA RLC not idle in %s\n", __func__); | |
555 | return -ETIME; | |
556 | } | |
557 | usleep_range(500, 1000); | |
558 | } | |
559 | ||
560 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0); | |
561 | WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, | |
562 | RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) | | |
563 | SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK); | |
564 | ||
565 | m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR); | |
566 | m->sdmax_rlcx_rb_rptr_hi = | |
567 | RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI); | |
568 | ||
569 | return 0; | |
570 | } | |
571 | ||
572 | static int wave_control_execute_v11(struct amdgpu_device *adev, | |
573 | uint32_t gfx_index_val, | |
574 | uint32_t sq_cmd) | |
575 | { | |
576 | uint32_t data = 0; | |
577 | ||
578 | mutex_lock(&adev->grbm_idx_mutex); | |
579 | ||
580 | WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val); | |
581 | WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd); | |
582 | ||
583 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
584 | INSTANCE_BROADCAST_WRITES, 1); | |
585 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
586 | SA_BROADCAST_WRITES, 1); | |
587 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
588 | SE_BROADCAST_WRITES, 1); | |
589 | ||
590 | WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data); | |
591 | mutex_unlock(&adev->grbm_idx_mutex); | |
592 | ||
593 | return 0; | |
594 | } | |
595 | ||
596 | static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev, | |
597 | uint32_t vmid, uint64_t page_table_base) | |
598 | { | |
599 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
600 | pr_err("trying to set page table base for wrong VMID %u\n", | |
601 | vmid); | |
602 | return; | |
603 | } | |
604 | ||
605 | /* SDMA is on gfxhub as well for gfx11 adapters */ | |
606 | adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); | |
607 | } | |
608 | ||
609 | const struct kfd2kgd_calls gfx_v11_kfd2kgd = { | |
610 | .program_sh_mem_settings = program_sh_mem_settings_v11, | |
611 | .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, | |
612 | .init_interrupts = init_interrupts_v11, | |
613 | .hqd_load = hqd_load_v11, | |
614 | .hiq_mqd_load = hiq_mqd_load_v11, | |
615 | .hqd_sdma_load = hqd_sdma_load_v11, | |
616 | .hqd_dump = hqd_dump_v11, | |
617 | .hqd_sdma_dump = hqd_sdma_dump_v11, | |
618 | .hqd_is_occupied = hqd_is_occupied_v11, | |
619 | .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11, | |
620 | .hqd_destroy = hqd_destroy_v11, | |
621 | .hqd_sdma_destroy = hqd_sdma_destroy_v11, | |
622 | .wave_control_execute = wave_control_execute_v11, | |
623 | .get_atc_vmid_pasid_mapping_info = NULL, | |
624 | .set_vm_context_page_table_base = set_vm_context_page_table_base_v11, | |
625 | }; |