Commit | Line | Data |
---|---|---|
d5a114a6 FK |
1 | /* |
2 | * Copyright 2014-2018 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #define pr_fmt(fmt) "kfd2kgd: " fmt | |
24 | ||
25 | #include <linux/module.h> | |
26 | #include <linux/fdtable.h> | |
27 | #include <linux/uaccess.h> | |
5634e38c | 28 | #include <linux/mmu_context.h> |
d5a114a6 FK |
29 | #include <drm/drmP.h> |
30 | #include "amdgpu.h" | |
31 | #include "amdgpu_amdkfd.h" | |
d5a114a6 FK |
32 | #include "soc15_hw_ip.h" |
33 | #include "gc/gc_9_0_offset.h" | |
34 | #include "gc/gc_9_0_sh_mask.h" | |
35 | #include "vega10_enum.h" | |
36 | #include "sdma0/sdma0_4_0_offset.h" | |
37 | #include "sdma0/sdma0_4_0_sh_mask.h" | |
38 | #include "sdma1/sdma1_4_0_offset.h" | |
39 | #include "sdma1/sdma1_4_0_sh_mask.h" | |
40 | #include "athub/athub_1_0_offset.h" | |
41 | #include "athub/athub_1_0_sh_mask.h" | |
42 | #include "oss/osssys_4_0_offset.h" | |
43 | #include "oss/osssys_4_0_sh_mask.h" | |
44 | #include "soc15_common.h" | |
45 | #include "v9_structs.h" | |
46 | #include "soc15.h" | |
47 | #include "soc15d.h" | |
e4312d45 AD |
48 | #include "mmhub_v1_0.h" |
49 | #include "gfxhub_v1_0.h" | |
d5a114a6 | 50 | |
d5a114a6 | 51 | |
d5a114a6 FK |
52 | #define V9_PIPE_PER_MEC (4) |
53 | #define V9_QUEUES_PER_PIPE_MEC (8) | |
54 | ||
55 | enum hqd_dequeue_request_type { | |
56 | NO_ACTION = 0, | |
57 | DRAIN_PIPE, | |
58 | RESET_WAVES | |
59 | }; | |
60 | ||
61 | /* | |
62 | * Register access functions | |
63 | */ | |
64 | ||
65 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |
66 | uint32_t sh_mem_config, | |
67 | uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, | |
68 | uint32_t sh_mem_bases); | |
69 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |
70 | unsigned int vmid); | |
71 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | |
72 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |
73 | uint32_t queue_id, uint32_t __user *wptr, | |
74 | uint32_t wptr_shift, uint32_t wptr_mask, | |
75 | struct mm_struct *mm); | |
76 | static int kgd_hqd_dump(struct kgd_dev *kgd, | |
77 | uint32_t pipe_id, uint32_t queue_id, | |
78 | uint32_t (**dump)[2], uint32_t *n_regs); | |
79 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, | |
80 | uint32_t __user *wptr, struct mm_struct *mm); | |
81 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, | |
82 | uint32_t engine_id, uint32_t queue_id, | |
83 | uint32_t (**dump)[2], uint32_t *n_regs); | |
84 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | |
85 | uint32_t pipe_id, uint32_t queue_id); | |
86 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); | |
87 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, | |
88 | enum kfd_preempt_type reset_type, | |
89 | unsigned int utimeout, uint32_t pipe_id, | |
90 | uint32_t queue_id); | |
91 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | |
92 | unsigned int utimeout); | |
93 | static int kgd_address_watch_disable(struct kgd_dev *kgd); | |
94 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | |
95 | unsigned int watch_point_id, | |
96 | uint32_t cntl_val, | |
97 | uint32_t addr_hi, | |
98 | uint32_t addr_lo); | |
99 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | |
100 | uint32_t gfx_index_val, | |
101 | uint32_t sq_cmd); | |
102 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |
103 | unsigned int watch_point_id, | |
104 | unsigned int reg_offset); | |
105 | ||
106 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | |
107 | uint8_t vmid); | |
108 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |
109 | uint8_t vmid); | |
110 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | |
e715c6d0 | 111 | uint64_t page_table_base); |
d5a114a6 FK |
112 | static void set_scratch_backing_va(struct kgd_dev *kgd, |
113 | uint64_t va, uint32_t vmid); | |
114 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); | |
115 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); | |
116 | ||
117 | /* Because of REG_GET_FIELD() being used, we put this function in the | |
118 | * asic specific file. | |
119 | */ | |
120 | static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, | |
121 | struct tile_config *config) | |
122 | { | |
123 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
124 | ||
125 | config->gb_addr_config = adev->gfx.config.gb_addr_config; | |
126 | ||
127 | config->tile_config_ptr = adev->gfx.config.tile_mode_array; | |
128 | config->num_tile_configs = | |
129 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); | |
130 | config->macro_tile_config_ptr = | |
131 | adev->gfx.config.macrotile_mode_array; | |
132 | config->num_macro_tile_configs = | |
133 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | |
134 | ||
135 | return 0; | |
136 | } | |
137 | ||
138 | static const struct kfd2kgd_calls kfd2kgd = { | |
d5a114a6 FK |
139 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
140 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | |
141 | .init_interrupts = kgd_init_interrupts, | |
142 | .hqd_load = kgd_hqd_load, | |
143 | .hqd_sdma_load = kgd_hqd_sdma_load, | |
144 | .hqd_dump = kgd_hqd_dump, | |
145 | .hqd_sdma_dump = kgd_hqd_sdma_dump, | |
146 | .hqd_is_occupied = kgd_hqd_is_occupied, | |
147 | .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, | |
148 | .hqd_destroy = kgd_hqd_destroy, | |
149 | .hqd_sdma_destroy = kgd_hqd_sdma_destroy, | |
150 | .address_watch_disable = kgd_address_watch_disable, | |
151 | .address_watch_execute = kgd_address_watch_execute, | |
152 | .wave_control_execute = kgd_wave_control_execute, | |
153 | .address_watch_get_offset = kgd_address_watch_get_offset, | |
154 | .get_atc_vmid_pasid_mapping_pasid = | |
155 | get_atc_vmid_pasid_mapping_pasid, | |
156 | .get_atc_vmid_pasid_mapping_valid = | |
157 | get_atc_vmid_pasid_mapping_valid, | |
d5a114a6 FK |
158 | .set_scratch_backing_va = set_scratch_backing_va, |
159 | .get_tile_config = amdgpu_amdkfd_get_tile_config, | |
d5a114a6 | 160 | .set_vm_context_page_table_base = set_vm_context_page_table_base, |
d5a114a6 FK |
161 | .invalidate_tlbs = invalidate_tlbs, |
162 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | |
db8b62c0 | 163 | .get_hive_id = amdgpu_amdkfd_get_hive_id, |
d5a114a6 FK |
164 | }; |
165 | ||
166 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) | |
167 | { | |
168 | return (struct kfd2kgd_calls *)&kfd2kgd; | |
169 | } | |
170 | ||
171 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | |
172 | { | |
173 | return (struct amdgpu_device *)kgd; | |
174 | } | |
175 | ||
176 | static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, | |
177 | uint32_t queue, uint32_t vmid) | |
178 | { | |
179 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
180 | ||
181 | mutex_lock(&adev->srbm_mutex); | |
182 | soc15_grbm_select(adev, mec, pipe, queue, vmid); | |
183 | } | |
184 | ||
185 | static void unlock_srbm(struct kgd_dev *kgd) | |
186 | { | |
187 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
188 | ||
189 | soc15_grbm_select(adev, 0, 0, 0, 0); | |
190 | mutex_unlock(&adev->srbm_mutex); | |
191 | } | |
192 | ||
193 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | |
194 | uint32_t queue_id) | |
195 | { | |
196 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
197 | ||
198 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
199 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
200 | ||
201 | lock_srbm(kgd, mec, pipe, queue_id, 0); | |
202 | } | |
203 | ||
204 | static uint32_t get_queue_mask(struct amdgpu_device *adev, | |
205 | uint32_t pipe_id, uint32_t queue_id) | |
206 | { | |
207 | unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + | |
208 | queue_id) & 31; | |
209 | ||
210 | return ((uint32_t)1) << bit; | |
211 | } | |
212 | ||
213 | static void release_queue(struct kgd_dev *kgd) | |
214 | { | |
215 | unlock_srbm(kgd); | |
216 | } | |
217 | ||
218 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |
219 | uint32_t sh_mem_config, | |
220 | uint32_t sh_mem_ape1_base, | |
221 | uint32_t sh_mem_ape1_limit, | |
222 | uint32_t sh_mem_bases) | |
223 | { | |
224 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
225 | ||
226 | lock_srbm(kgd, 0, 0, 0, vmid); | |
227 | ||
1bff7f6c TH |
228 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); |
229 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); | |
d5a114a6 FK |
230 | /* APE1 no longer exists on GFX9 */ |
231 | ||
232 | unlock_srbm(kgd); | |
233 | } | |
234 | ||
235 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |
236 | unsigned int vmid) | |
237 | { | |
238 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
239 | ||
240 | /* | |
241 | * We have to assume that there is no outstanding mapping. | |
242 | * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because | |
243 | * a mapping is in progress or because a mapping finished | |
244 | * and the SW cleared it. | |
245 | * So the protocol is to always wait & clear. | |
246 | */ | |
247 | uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | | |
248 | ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
249 | ||
250 | /* | |
251 | * need to do this twice, once for gfx and once for mmhub | |
252 | * for ATC add 16 to VMID for mmhub, for IH different registers. | |
253 | * ATC_VMID0..15 registers are separate from ATC_VMID16..31. | |
254 | */ | |
255 | ||
256 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, | |
257 | pasid_mapping); | |
258 | ||
259 | while (!(RREG32(SOC15_REG_OFFSET( | |
260 | ATHUB, 0, | |
261 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | |
262 | (1U << vmid))) | |
263 | cpu_relax(); | |
264 | ||
265 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | |
266 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | |
267 | 1U << vmid); | |
268 | ||
269 | /* Mapping vmid to pasid also for IH block */ | |
270 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, | |
271 | pasid_mapping); | |
272 | ||
273 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, | |
274 | pasid_mapping); | |
275 | ||
276 | while (!(RREG32(SOC15_REG_OFFSET( | |
277 | ATHUB, 0, | |
278 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | |
279 | (1U << (vmid + 16)))) | |
280 | cpu_relax(); | |
281 | ||
282 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | |
283 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | |
284 | 1U << (vmid + 16)); | |
285 | ||
286 | /* Mapping vmid to pasid also for IH block */ | |
287 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, | |
288 | pasid_mapping); | |
289 | return 0; | |
290 | } | |
291 | ||
292 | /* TODO - RING0 form of field is obsolete, seems to date back to SI | |
293 | * but still works | |
294 | */ | |
295 | ||
296 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | |
297 | { | |
298 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
299 | uint32_t mec; | |
300 | uint32_t pipe; | |
301 | ||
302 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
303 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
304 | ||
305 | lock_srbm(kgd, mec, pipe, 0, 0); | |
306 | ||
307 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), | |
308 | CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | | |
309 | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); | |
310 | ||
311 | unlock_srbm(kgd); | |
312 | ||
313 | return 0; | |
314 | } | |
315 | ||
316 | static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, | |
317 | unsigned int engine_id, | |
318 | unsigned int queue_id) | |
319 | { | |
320 | uint32_t base[2] = { | |
321 | SOC15_REG_OFFSET(SDMA0, 0, | |
322 | mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, | |
323 | SOC15_REG_OFFSET(SDMA1, 0, | |
324 | mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL | |
325 | }; | |
326 | uint32_t retval; | |
327 | ||
328 | retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - | |
329 | mmSDMA0_RLC0_RB_CNTL); | |
330 | ||
331 | pr_debug("sdma base address: 0x%x\n", retval); | |
332 | ||
333 | return retval; | |
334 | } | |
335 | ||
336 | static inline struct v9_mqd *get_mqd(void *mqd) | |
337 | { | |
338 | return (struct v9_mqd *)mqd; | |
339 | } | |
340 | ||
341 | static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) | |
342 | { | |
343 | return (struct v9_sdma_mqd *)mqd; | |
344 | } | |
345 | ||
346 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | |
347 | uint32_t queue_id, uint32_t __user *wptr, | |
348 | uint32_t wptr_shift, uint32_t wptr_mask, | |
349 | struct mm_struct *mm) | |
350 | { | |
351 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
352 | struct v9_mqd *m; | |
353 | uint32_t *mqd_hqd; | |
354 | uint32_t reg, hqd_base, data; | |
355 | ||
356 | m = get_mqd(mqd); | |
357 | ||
358 | acquire_queue(kgd, pipe_id, queue_id); | |
359 | ||
360 | /* HIQ is set during driver init period with vmid set to 0*/ | |
361 | if (m->cp_hqd_vmid == 0) { | |
362 | uint32_t value, mec, pipe; | |
363 | ||
364 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | |
365 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | |
366 | ||
367 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | |
368 | mec, pipe, queue_id); | |
369 | value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); | |
370 | value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, | |
371 | ((mec << 5) | (pipe << 3) | queue_id | 0x80)); | |
1bff7f6c | 372 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); |
d5a114a6 FK |
373 | } |
374 | ||
375 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ | |
376 | mqd_hqd = &m->cp_mqd_base_addr_lo; | |
377 | hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | |
378 | ||
379 | for (reg = hqd_base; | |
380 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | |
1bff7f6c | 381 | WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); |
d5a114a6 FK |
382 | |
383 | ||
384 | /* Activate doorbell logic before triggering WPTR poll. */ | |
385 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | |
386 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | |
1bff7f6c | 387 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); |
d5a114a6 FK |
388 | |
389 | if (wptr) { | |
390 | /* Don't read wptr with get_user because the user | |
391 | * context may not be accessible (if this function | |
392 | * runs in a work queue). Instead trigger a one-shot | |
393 | * polling read from memory in the CP. This assumes | |
394 | * that wptr is GPU-accessible in the queue's VMID via | |
395 | * ATC or SVM. WPTR==RPTR before starting the poll so | |
396 | * the CP starts fetching new commands from the right | |
397 | * place. | |
398 | * | |
399 | * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit | |
400 | * tricky. Assume that the queue didn't overflow. The | |
401 | * number of valid bits in the 32-bit RPTR depends on | |
402 | * the queue size. The remaining bits are taken from | |
403 | * the saved 64-bit WPTR. If the WPTR wrapped, add the | |
404 | * queue size. | |
405 | */ | |
406 | uint32_t queue_size = | |
407 | 2 << REG_GET_FIELD(m->cp_hqd_pq_control, | |
408 | CP_HQD_PQ_CONTROL, QUEUE_SIZE); | |
409 | uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); | |
410 | ||
411 | if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) | |
412 | guessed_wptr += queue_size; | |
413 | guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); | |
414 | guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; | |
415 | ||
1bff7f6c | 416 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), |
d5a114a6 | 417 | lower_32_bits(guessed_wptr)); |
1bff7f6c | 418 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), |
d5a114a6 | 419 | upper_32_bits(guessed_wptr)); |
1bff7f6c | 420 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), |
ebe1d22b | 421 | lower_32_bits((uintptr_t)wptr)); |
1bff7f6c | 422 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), |
ebe1d22b | 423 | upper_32_bits((uintptr_t)wptr)); |
d5a114a6 FK |
424 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), |
425 | get_queue_mask(adev, pipe_id, queue_id)); | |
426 | } | |
427 | ||
428 | /* Start the EOP fetcher */ | |
1bff7f6c | 429 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), |
d5a114a6 FK |
430 | REG_SET_FIELD(m->cp_hqd_eop_rptr, |
431 | CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); | |
432 | ||
433 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | |
1bff7f6c | 434 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); |
d5a114a6 FK |
435 | |
436 | release_queue(kgd); | |
437 | ||
438 | return 0; | |
439 | } | |
440 | ||
441 | static int kgd_hqd_dump(struct kgd_dev *kgd, | |
442 | uint32_t pipe_id, uint32_t queue_id, | |
443 | uint32_t (**dump)[2], uint32_t *n_regs) | |
444 | { | |
445 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
446 | uint32_t i = 0, reg; | |
447 | #define HQD_N_REGS 56 | |
448 | #define DUMP_REG(addr) do { \ | |
449 | if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ | |
450 | break; \ | |
451 | (*dump)[i][0] = (addr) << 2; \ | |
452 | (*dump)[i++][1] = RREG32(addr); \ | |
453 | } while (0) | |
454 | ||
6da2ec56 | 455 | *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); |
d5a114a6 FK |
456 | if (*dump == NULL) |
457 | return -ENOMEM; | |
458 | ||
459 | acquire_queue(kgd, pipe_id, queue_id); | |
460 | ||
461 | for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | |
462 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | |
463 | DUMP_REG(reg); | |
464 | ||
465 | release_queue(kgd); | |
466 | ||
467 | WARN_ON_ONCE(i != HQD_N_REGS); | |
468 | *n_regs = i; | |
469 | ||
470 | return 0; | |
471 | } | |
472 | ||
473 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, | |
474 | uint32_t __user *wptr, struct mm_struct *mm) | |
475 | { | |
476 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
477 | struct v9_sdma_mqd *m; | |
478 | uint32_t sdma_base_addr, sdmax_gfx_context_cntl; | |
479 | unsigned long end_jiffies; | |
480 | uint32_t data; | |
481 | uint64_t data64; | |
482 | uint64_t __user *wptr64 = (uint64_t __user *)wptr; | |
483 | ||
484 | m = get_sdma_mqd(mqd); | |
485 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | |
486 | m->sdma_queue_id); | |
487 | sdmax_gfx_context_cntl = m->sdma_engine_id ? | |
488 | SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : | |
489 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); | |
490 | ||
491 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, | |
492 | m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); | |
493 | ||
494 | end_jiffies = msecs_to_jiffies(2000) + jiffies; | |
495 | while (true) { | |
496 | data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | |
497 | if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) | |
498 | break; | |
499 | if (time_after(jiffies, end_jiffies)) | |
500 | return -ETIME; | |
501 | usleep_range(500, 1000); | |
502 | } | |
503 | data = RREG32(sdmax_gfx_context_cntl); | |
504 | data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, | |
505 | RESUME_CTX, 0); | |
506 | WREG32(sdmax_gfx_context_cntl, data); | |
507 | ||
508 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, | |
509 | m->sdmax_rlcx_doorbell_offset); | |
510 | ||
511 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, | |
512 | ENABLE, 1); | |
513 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); | |
514 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); | |
515 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, | |
516 | m->sdmax_rlcx_rb_rptr_hi); | |
517 | ||
518 | WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); | |
519 | if (read_user_wptr(mm, wptr64, data64)) { | |
520 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, | |
521 | lower_32_bits(data64)); | |
522 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, | |
523 | upper_32_bits(data64)); | |
524 | } else { | |
525 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, | |
526 | m->sdmax_rlcx_rb_rptr); | |
527 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, | |
528 | m->sdmax_rlcx_rb_rptr_hi); | |
529 | } | |
530 | WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); | |
531 | ||
532 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); | |
533 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, | |
534 | m->sdmax_rlcx_rb_base_hi); | |
535 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, | |
536 | m->sdmax_rlcx_rb_rptr_addr_lo); | |
537 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, | |
538 | m->sdmax_rlcx_rb_rptr_addr_hi); | |
539 | ||
540 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, | |
541 | RB_ENABLE, 1); | |
542 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); | |
543 | ||
544 | return 0; | |
545 | } | |
546 | ||
547 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, | |
548 | uint32_t engine_id, uint32_t queue_id, | |
549 | uint32_t (**dump)[2], uint32_t *n_regs) | |
550 | { | |
551 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
552 | uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); | |
553 | uint32_t i = 0, reg; | |
554 | #undef HQD_N_REGS | |
555 | #define HQD_N_REGS (19+6+7+10) | |
556 | ||
6da2ec56 | 557 | *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); |
d5a114a6 FK |
558 | if (*dump == NULL) |
559 | return -ENOMEM; | |
560 | ||
561 | for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) | |
562 | DUMP_REG(sdma_base_addr + reg); | |
563 | for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) | |
564 | DUMP_REG(sdma_base_addr + reg); | |
565 | for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; | |
566 | reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) | |
567 | DUMP_REG(sdma_base_addr + reg); | |
568 | for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; | |
569 | reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) | |
570 | DUMP_REG(sdma_base_addr + reg); | |
571 | ||
572 | WARN_ON_ONCE(i != HQD_N_REGS); | |
573 | *n_regs = i; | |
574 | ||
575 | return 0; | |
576 | } | |
577 | ||
578 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | |
579 | uint32_t pipe_id, uint32_t queue_id) | |
580 | { | |
581 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
582 | uint32_t act; | |
583 | bool retval = false; | |
584 | uint32_t low, high; | |
585 | ||
586 | acquire_queue(kgd, pipe_id, queue_id); | |
587 | act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | |
588 | if (act) { | |
589 | low = lower_32_bits(queue_address >> 8); | |
590 | high = upper_32_bits(queue_address >> 8); | |
591 | ||
592 | if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && | |
593 | high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) | |
594 | retval = true; | |
595 | } | |
596 | release_queue(kgd); | |
597 | return retval; | |
598 | } | |
599 | ||
600 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | |
601 | { | |
602 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
603 | struct v9_sdma_mqd *m; | |
604 | uint32_t sdma_base_addr; | |
605 | uint32_t sdma_rlc_rb_cntl; | |
606 | ||
607 | m = get_sdma_mqd(mqd); | |
608 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | |
609 | m->sdma_queue_id); | |
610 | ||
611 | sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | |
612 | ||
613 | if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) | |
614 | return true; | |
615 | ||
616 | return false; | |
617 | } | |
618 | ||
619 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, | |
620 | enum kfd_preempt_type reset_type, | |
621 | unsigned int utimeout, uint32_t pipe_id, | |
622 | uint32_t queue_id) | |
623 | { | |
624 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
625 | enum hqd_dequeue_request_type type; | |
626 | unsigned long end_jiffies; | |
627 | uint32_t temp; | |
628 | struct v9_mqd *m = get_mqd(mqd); | |
629 | ||
1b0bfcff SL |
630 | if (adev->in_gpu_reset) |
631 | return -EIO; | |
632 | ||
d5a114a6 FK |
633 | acquire_queue(kgd, pipe_id, queue_id); |
634 | ||
635 | if (m->cp_hqd_vmid == 0) | |
1bff7f6c | 636 | WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); |
d5a114a6 FK |
637 | |
638 | switch (reset_type) { | |
639 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | |
640 | type = DRAIN_PIPE; | |
641 | break; | |
642 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | |
643 | type = RESET_WAVES; | |
644 | break; | |
645 | default: | |
646 | type = DRAIN_PIPE; | |
647 | break; | |
648 | } | |
649 | ||
1bff7f6c | 650 | WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); |
d5a114a6 FK |
651 | |
652 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
653 | while (true) { | |
654 | temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | |
655 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) | |
656 | break; | |
657 | if (time_after(jiffies, end_jiffies)) { | |
658 | pr_err("cp queue preemption time out.\n"); | |
659 | release_queue(kgd); | |
660 | return -ETIME; | |
661 | } | |
662 | usleep_range(500, 1000); | |
663 | } | |
664 | ||
665 | release_queue(kgd); | |
666 | return 0; | |
667 | } | |
668 | ||
669 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | |
670 | unsigned int utimeout) | |
671 | { | |
672 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
673 | struct v9_sdma_mqd *m; | |
674 | uint32_t sdma_base_addr; | |
675 | uint32_t temp; | |
676 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
677 | ||
678 | m = get_sdma_mqd(mqd); | |
679 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | |
680 | m->sdma_queue_id); | |
681 | ||
682 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | |
683 | temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; | |
684 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); | |
685 | ||
686 | while (true) { | |
687 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | |
688 | if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) | |
689 | break; | |
690 | if (time_after(jiffies, end_jiffies)) | |
691 | return -ETIME; | |
692 | usleep_range(500, 1000); | |
693 | } | |
694 | ||
695 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); | |
696 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, | |
697 | RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | | |
698 | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); | |
699 | ||
700 | m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); | |
701 | m->sdmax_rlcx_rb_rptr_hi = | |
702 | RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); | |
703 | ||
704 | return 0; | |
705 | } | |
706 | ||
707 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | |
708 | uint8_t vmid) | |
709 | { | |
710 | uint32_t reg; | |
711 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
712 | ||
713 | reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) | |
714 | + vmid); | |
715 | return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
716 | } | |
717 | ||
718 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |
719 | uint8_t vmid) | |
720 | { | |
721 | uint32_t reg; | |
722 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
723 | ||
724 | reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) | |
725 | + vmid); | |
726 | return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; | |
727 | } | |
728 | ||
e14ba95b | 729 | static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, |
730 | uint32_t flush_type) | |
d5a114a6 FK |
731 | { |
732 | signed long r; | |
733 | uint32_t seq; | |
734 | struct amdgpu_ring *ring = &adev->gfx.kiq.ring; | |
735 | ||
736 | spin_lock(&adev->gfx.kiq.ring_lock); | |
737 | amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ | |
738 | amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); | |
739 | amdgpu_ring_write(ring, | |
740 | PACKET3_INVALIDATE_TLBS_DST_SEL(1) | | |
741 | PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | | |
742 | PACKET3_INVALIDATE_TLBS_PASID(pasid) | | |
e14ba95b | 743 | PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); |
d5a114a6 FK |
744 | amdgpu_fence_emit_polling(ring, &seq); |
745 | amdgpu_ring_commit(ring); | |
746 | spin_unlock(&adev->gfx.kiq.ring_lock); | |
747 | ||
748 | r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); | |
749 | if (r < 1) { | |
750 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | |
751 | return -ETIME; | |
752 | } | |
753 | ||
754 | return 0; | |
755 | } | |
756 | ||
757 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) | |
758 | { | |
759 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
760 | int vmid; | |
761 | struct amdgpu_ring *ring = &adev->gfx.kiq.ring; | |
e14ba95b | 762 | uint32_t flush_type = 0; |
d5a114a6 | 763 | |
bff418a2 SL |
764 | if (adev->in_gpu_reset) |
765 | return -EIO; | |
e14ba95b | 766 | if (adev->gmc.xgmi.num_physical_nodes && |
767 | adev->asic_type == CHIP_VEGA20) | |
768 | flush_type = 2; | |
bff418a2 | 769 | |
c66ed765 | 770 | if (ring->sched.ready) |
e14ba95b | 771 | return invalidate_tlbs_with_kiq(adev, pasid, flush_type); |
d5a114a6 FK |
772 | |
773 | for (vmid = 0; vmid < 16; vmid++) { | |
774 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) | |
775 | continue; | |
776 | if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { | |
777 | if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) | |
778 | == pasid) { | |
e14ba95b | 779 | amdgpu_gmc_flush_gpu_tlb(adev, vmid, |
780 | flush_type); | |
d5a114a6 FK |
781 | break; |
782 | } | |
783 | } | |
784 | } | |
785 | ||
786 | return 0; | |
787 | } | |
788 | ||
789 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) | |
790 | { | |
791 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | |
792 | ||
793 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
794 | pr_err("non kfd vmid %d\n", vmid); | |
795 | return 0; | |
796 | } | |
797 | ||
e14ba95b | 798 | /* Use legacy mode tlb invalidation. |
799 | * | |
800 | * Currently on Raven the code below is broken for anything but | |
801 | * legacy mode due to a MMHUB power gating problem. A workaround | |
802 | * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ | |
803 | * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack | |
804 | * bit. | |
805 | * | |
806 | * TODO 1: agree on the right set of invalidation registers for | |
807 | * KFD use. Use the last one for now. Invalidate both GC and | |
808 | * MMHUB. | |
809 | * | |
810 | * TODO 2: support range-based invalidation, requires kfg2kgd | |
811 | * interface change | |
812 | */ | |
813 | amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); | |
d5a114a6 FK |
814 | return 0; |
815 | } | |
816 | ||
817 | static int kgd_address_watch_disable(struct kgd_dev *kgd) | |
818 | { | |
819 | return 0; | |
820 | } | |
821 | ||
822 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | |
823 | unsigned int watch_point_id, | |
824 | uint32_t cntl_val, | |
825 | uint32_t addr_hi, | |
826 | uint32_t addr_lo) | |
827 | { | |
828 | return 0; | |
829 | } | |
830 | ||
831 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | |
832 | uint32_t gfx_index_val, | |
833 | uint32_t sq_cmd) | |
834 | { | |
835 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
836 | uint32_t data = 0; | |
837 | ||
838 | mutex_lock(&adev->grbm_idx_mutex); | |
839 | ||
1bff7f6c | 840 | WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); |
d5a114a6 FK |
841 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); |
842 | ||
843 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
844 | INSTANCE_BROADCAST_WRITES, 1); | |
845 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
846 | SH_BROADCAST_WRITES, 1); | |
847 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | |
848 | SE_BROADCAST_WRITES, 1); | |
849 | ||
1bff7f6c | 850 | WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); |
d5a114a6 FK |
851 | mutex_unlock(&adev->grbm_idx_mutex); |
852 | ||
853 | return 0; | |
854 | } | |
855 | ||
856 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |
857 | unsigned int watch_point_id, | |
858 | unsigned int reg_offset) | |
859 | { | |
860 | return 0; | |
861 | } | |
862 | ||
863 | static void set_scratch_backing_va(struct kgd_dev *kgd, | |
864 | uint64_t va, uint32_t vmid) | |
865 | { | |
866 | /* No longer needed on GFXv9. The scratch base address is | |
867 | * passed to the shader by the CP. It's the user mode driver's | |
868 | * responsibility. | |
869 | */ | |
870 | } | |
871 | ||
d5a114a6 | 872 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, |
e715c6d0 | 873 | uint64_t page_table_base) |
d5a114a6 FK |
874 | { |
875 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
d5a114a6 FK |
876 | |
877 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | |
878 | pr_err("trying to set page table base for wrong VMID %u\n", | |
879 | vmid); | |
880 | return; | |
881 | } | |
882 | ||
883 | /* TODO: take advantage of per-process address space size. For | |
884 | * now, all processes share the same address space size, like | |
885 | * on GFX8 and older. | |
886 | */ | |
435e2f97 | 887 | mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); |
d5a114a6 | 888 | |
435e2f97 | 889 | gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); |
d5a114a6 | 890 | } |