Commit | Line | Data |
---|---|---|
1c2014da TY |
1 | /* |
2 | * Copyright 2021 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | */ | |
23 | #include <linux/firmware.h> | |
24 | #include <linux/pci.h> | |
25 | #include "amdgpu.h" | |
26 | #include "amdgpu_atomfirmware.h" | |
27 | #include "gmc_v11_0.h" | |
28 | #include "umc_v8_7.h" | |
29 | #include "athub/athub_3_0_0_sh_mask.h" | |
30 | #include "athub/athub_3_0_0_offset.h" | |
31 | #include "oss/osssys_6_0_0_offset.h" | |
32 | #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" | |
33 | #include "navi10_enum.h" | |
34 | #include "soc15.h" | |
35 | #include "soc15d.h" | |
36 | #include "soc15_common.h" | |
37 | #include "nbio_v4_3.h" | |
38 | #include "gfxhub_v3_0.h" | |
39 | #include "mmhub_v3_0.h" | |
f40fc191 | 40 | #include "mmhub_v3_0_2.h" |
1c2014da TY |
41 | #include "athub_v3_0.h" |
42 | ||
43 | ||
44 | static int gmc_v11_0_ecc_interrupt_state(struct amdgpu_device *adev, | |
45 | struct amdgpu_irq_src *src, | |
46 | unsigned type, | |
47 | enum amdgpu_interrupt_state state) | |
48 | { | |
49 | return 0; | |
50 | } | |
51 | ||
52 | static int | |
53 | gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, | |
54 | struct amdgpu_irq_src *src, unsigned type, | |
55 | enum amdgpu_interrupt_state state) | |
56 | { | |
57 | switch (state) { | |
58 | case AMDGPU_IRQ_STATE_DISABLE: | |
59 | /* MM HUB */ | |
60 | amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false); | |
61 | /* GFX HUB */ | |
62 | amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false); | |
63 | break; | |
64 | case AMDGPU_IRQ_STATE_ENABLE: | |
65 | /* MM HUB */ | |
66 | amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true); | |
67 | /* GFX HUB */ | |
68 | amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true); | |
69 | break; | |
70 | default: | |
71 | break; | |
72 | } | |
73 | ||
74 | return 0; | |
75 | } | |
76 | ||
77 | static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, | |
78 | struct amdgpu_irq_src *source, | |
79 | struct amdgpu_iv_entry *entry) | |
80 | { | |
81 | struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; | |
82 | uint32_t status = 0; | |
83 | u64 addr; | |
84 | ||
85 | addr = (u64)entry->src_data[0] << 12; | |
86 | addr |= ((u64)entry->src_data[1] & 0xf) << 44; | |
87 | ||
88 | if (!amdgpu_sriov_vf(adev)) { | |
89 | /* | |
90 | * Issue a dummy read to wait for the status register to | |
91 | * be updated to avoid reading an incorrect value due to | |
92 | * the new fast GRBM interface. | |
93 | */ | |
94 | if (entry->vmid_src == AMDGPU_GFXHUB_0) | |
95 | RREG32(hub->vm_l2_pro_fault_status); | |
96 | ||
97 | status = RREG32(hub->vm_l2_pro_fault_status); | |
98 | WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); | |
99 | } | |
100 | ||
101 | if (printk_ratelimit()) { | |
102 | struct amdgpu_task_info task_info; | |
103 | ||
104 | memset(&task_info, 0, sizeof(struct amdgpu_task_info)); | |
105 | amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); | |
106 | ||
107 | dev_err(adev->dev, | |
108 | "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " | |
109 | "for process %s pid %d thread %s pid %d)\n", | |
110 | entry->vmid_src ? "mmhub" : "gfxhub", | |
111 | entry->src_id, entry->ring_id, entry->vmid, | |
112 | entry->pasid, task_info.process_name, task_info.tgid, | |
113 | task_info.task_name, task_info.pid); | |
114 | dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", | |
115 | addr, entry->client_id); | |
116 | if (!amdgpu_sriov_vf(adev)) | |
117 | hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); | |
118 | } | |
119 | ||
120 | return 0; | |
121 | } | |
122 | ||
123 | static const struct amdgpu_irq_src_funcs gmc_v11_0_irq_funcs = { | |
124 | .set = gmc_v11_0_vm_fault_interrupt_state, | |
125 | .process = gmc_v11_0_process_interrupt, | |
126 | }; | |
127 | ||
128 | static const struct amdgpu_irq_src_funcs gmc_v11_0_ecc_funcs = { | |
129 | .set = gmc_v11_0_ecc_interrupt_state, | |
130 | .process = amdgpu_umc_process_ecc_irq, | |
131 | }; | |
132 | ||
133 | static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev) | |
134 | { | |
135 | adev->gmc.vm_fault.num_types = 1; | |
136 | adev->gmc.vm_fault.funcs = &gmc_v11_0_irq_funcs; | |
137 | ||
138 | if (!amdgpu_sriov_vf(adev)) { | |
139 | adev->gmc.ecc_irq.num_types = 1; | |
140 | adev->gmc.ecc_irq.funcs = &gmc_v11_0_ecc_funcs; | |
141 | } | |
142 | } | |
143 | ||
144 | /** | |
145 | * gmc_v11_0_use_invalidate_semaphore - judge whether to use semaphore | |
146 | * | |
147 | * @adev: amdgpu_device pointer | |
148 | * @vmhub: vmhub type | |
149 | * | |
150 | */ | |
151 | static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev, | |
152 | uint32_t vmhub) | |
153 | { | |
154 | return ((vmhub == AMDGPU_MMHUB_0) && | |
155 | (!amdgpu_sriov_vf(adev))); | |
156 | } | |
157 | ||
3cc69021 | 158 | static bool gmc_v11_0_get_vmid_pasid_mapping_info( |
1c2014da TY |
159 | struct amdgpu_device *adev, |
160 | uint8_t vmid, uint16_t *p_pasid) | |
161 | { | |
3cc69021 | 162 | *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff; |
1c2014da | 163 | |
3cc69021 | 164 | return !!(*p_pasid); |
1c2014da TY |
165 | } |
166 | ||
167 | /* | |
168 | * GART | |
169 | * VMID 0 is the physical GPU addresses as used by the kernel. | |
170 | * VMIDs 1-15 are used for userspace clients and are handled | |
171 | * by the amdgpu vm/hsa code. | |
172 | */ | |
173 | ||
174 | static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, | |
175 | unsigned int vmhub, uint32_t flush_type) | |
176 | { | |
177 | bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub); | |
178 | struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; | |
179 | u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); | |
180 | u32 tmp; | |
181 | /* Use register 17 for GART */ | |
182 | const unsigned eng = 17; | |
183 | unsigned int i; | |
184 | ||
185 | spin_lock(&adev->gmc.invalidate_lock); | |
186 | /* | |
187 | * It may lose gpuvm invalidate acknowldege state across power-gating | |
188 | * off cycle, add semaphore acquire before invalidation and semaphore | |
189 | * release after invalidation to avoid entering power gated state | |
190 | * to WA the Issue | |
191 | */ | |
192 | ||
193 | /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ | |
194 | if (use_semaphore) { | |
195 | for (i = 0; i < adev->usec_timeout; i++) { | |
196 | /* a read return value of 1 means semaphore acuqire */ | |
197 | tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + | |
198 | hub->eng_distance * eng); | |
199 | if (tmp & 0x1) | |
200 | break; | |
201 | udelay(1); | |
202 | } | |
203 | ||
204 | if (i >= adev->usec_timeout) | |
205 | DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); | |
206 | } | |
207 | ||
208 | WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); | |
209 | ||
210 | /* Wait for ACK with a delay.*/ | |
211 | for (i = 0; i < adev->usec_timeout; i++) { | |
212 | tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + | |
213 | hub->eng_distance * eng); | |
214 | tmp &= 1 << vmid; | |
215 | if (tmp) | |
216 | break; | |
217 | ||
218 | udelay(1); | |
219 | } | |
220 | ||
221 | /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ | |
222 | if (use_semaphore) | |
223 | /* | |
224 | * add semaphore release after invalidation, | |
225 | * write with 0 means semaphore release | |
226 | */ | |
227 | WREG32_NO_KIQ(hub->vm_inv_eng0_sem + | |
228 | hub->eng_distance * eng, 0); | |
229 | ||
230 | /* Issue additional private vm invalidation to MMHUB */ | |
231 | if ((vmhub != AMDGPU_GFXHUB_0) && | |
232 | (hub->vm_l2_bank_select_reserved_cid2)) { | |
233 | inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); | |
234 | /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */ | |
235 | inv_req |= (1 << 25); | |
236 | /* Issue private invalidation */ | |
237 | WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req); | |
238 | /* Read back to ensure invalidation is done*/ | |
239 | RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); | |
240 | } | |
241 | ||
242 | spin_unlock(&adev->gmc.invalidate_lock); | |
243 | ||
244 | if (i < adev->usec_timeout) | |
245 | return; | |
246 | ||
247 | DRM_ERROR("Timeout waiting for VM flush ACK!\n"); | |
248 | } | |
249 | ||
250 | /** | |
251 | * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback | |
252 | * | |
253 | * @adev: amdgpu_device pointer | |
254 | * @vmid: vm instance to flush | |
255 | * | |
256 | * Flush the TLB for the requested page table. | |
257 | */ | |
258 | static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, | |
259 | uint32_t vmhub, uint32_t flush_type) | |
260 | { | |
261 | if ((vmhub == AMDGPU_GFXHUB_0) && !adev->gfx.is_poweron) | |
262 | return; | |
263 | ||
264 | /* flush hdp cache */ | |
265 | adev->hdp.funcs->flush_hdp(adev, NULL); | |
266 | ||
267 | /* For SRIOV run time, driver shouldn't access the register through MMIO | |
268 | * Directly use kiq to do the vm invalidation instead | |
269 | */ | |
18ee4ce6 | 270 | if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes && |
1c2014da TY |
271 | (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { |
272 | struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; | |
273 | const unsigned eng = 17; | |
274 | u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); | |
275 | u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; | |
276 | u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; | |
277 | ||
278 | amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, | |
279 | 1 << vmid); | |
280 | return; | |
281 | } | |
282 | ||
283 | mutex_lock(&adev->mman.gtt_window_lock); | |
284 | gmc_v11_0_flush_vm_hub(adev, vmid, vmhub, 0); | |
285 | mutex_unlock(&adev->mman.gtt_window_lock); | |
286 | return; | |
287 | } | |
288 | ||
289 | /** | |
290 | * gmc_v11_0_flush_gpu_tlb_pasid - tlb flush via pasid | |
291 | * | |
292 | * @adev: amdgpu_device pointer | |
293 | * @pasid: pasid to be flush | |
294 | * | |
295 | * Flush the TLB for the requested pasid. | |
296 | */ | |
297 | static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, | |
298 | uint16_t pasid, uint32_t flush_type, | |
299 | bool all_hub) | |
300 | { | |
301 | int vmid, i; | |
302 | signed long r; | |
303 | uint32_t seq; | |
304 | uint16_t queried_pasid; | |
305 | bool ret; | |
306 | struct amdgpu_ring *ring = &adev->gfx.kiq.ring; | |
307 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |
308 | ||
309 | if (amdgpu_emu_mode == 0 && ring->sched.ready) { | |
310 | spin_lock(&adev->gfx.kiq.ring_lock); | |
311 | /* 2 dwords flush + 8 dwords fence */ | |
312 | amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); | |
313 | kiq->pmf->kiq_invalidate_tlbs(ring, | |
314 | pasid, flush_type, all_hub); | |
315 | r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); | |
316 | if (r) { | |
317 | amdgpu_ring_undo(ring); | |
318 | spin_unlock(&adev->gfx.kiq.ring_lock); | |
319 | return -ETIME; | |
320 | } | |
321 | ||
322 | amdgpu_ring_commit(ring); | |
323 | spin_unlock(&adev->gfx.kiq.ring_lock); | |
324 | r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); | |
325 | if (r < 1) { | |
326 | dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); | |
327 | return -ETIME; | |
328 | } | |
329 | ||
330 | return 0; | |
331 | } | |
332 | ||
333 | for (vmid = 1; vmid < 16; vmid++) { | |
334 | ||
3cc69021 | 335 | ret = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid, |
1c2014da TY |
336 | &queried_pasid); |
337 | if (ret && queried_pasid == pasid) { | |
338 | if (all_hub) { | |
339 | for (i = 0; i < adev->num_vmhubs; i++) | |
340 | gmc_v11_0_flush_gpu_tlb(adev, vmid, | |
341 | i, flush_type); | |
342 | } else { | |
343 | gmc_v11_0_flush_gpu_tlb(adev, vmid, | |
344 | AMDGPU_GFXHUB_0, flush_type); | |
345 | } | |
346 | break; | |
347 | } | |
348 | } | |
349 | ||
350 | return 0; | |
351 | } | |
352 | ||
353 | static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, | |
354 | unsigned vmid, uint64_t pd_addr) | |
355 | { | |
356 | bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); | |
357 | struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; | |
358 | uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); | |
359 | unsigned eng = ring->vm_inv_eng; | |
360 | ||
361 | /* | |
362 | * It may lose gpuvm invalidate acknowldege state across power-gating | |
363 | * off cycle, add semaphore acquire before invalidation and semaphore | |
364 | * release after invalidation to avoid entering power gated state | |
365 | * to WA the Issue | |
366 | */ | |
367 | ||
368 | /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ | |
369 | if (use_semaphore) | |
370 | /* a read return value of 1 means semaphore acuqire */ | |
371 | amdgpu_ring_emit_reg_wait(ring, | |
372 | hub->vm_inv_eng0_sem + | |
373 | hub->eng_distance * eng, 0x1, 0x1); | |
374 | ||
375 | amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + | |
376 | (hub->ctx_addr_distance * vmid), | |
377 | lower_32_bits(pd_addr)); | |
378 | ||
379 | amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + | |
380 | (hub->ctx_addr_distance * vmid), | |
381 | upper_32_bits(pd_addr)); | |
382 | ||
383 | amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + | |
384 | hub->eng_distance * eng, | |
385 | hub->vm_inv_eng0_ack + | |
386 | hub->eng_distance * eng, | |
387 | req, 1 << vmid); | |
388 | ||
389 | /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ | |
390 | if (use_semaphore) | |
391 | /* | |
392 | * add semaphore release after invalidation, | |
393 | * write with 0 means semaphore release | |
394 | */ | |
395 | amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + | |
396 | hub->eng_distance * eng, 0); | |
397 | ||
398 | return pd_addr; | |
399 | } | |
400 | ||
401 | static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, | |
402 | unsigned pasid) | |
403 | { | |
404 | struct amdgpu_device *adev = ring->adev; | |
405 | uint32_t reg; | |
406 | ||
18ee4ce6 JX |
407 | /* MES fw manages IH_VMID_x_LUT updating */ |
408 | if (ring->is_mes_queue) | |
409 | return; | |
410 | ||
1c2014da TY |
411 | if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) |
412 | reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; | |
413 | else | |
414 | reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; | |
415 | ||
416 | amdgpu_ring_emit_wreg(ring, reg, pasid); | |
417 | } | |
418 | ||
419 | /* | |
420 | * PTE format: | |
421 | * 63:59 reserved | |
422 | * 58:57 reserved | |
423 | * 56 F | |
424 | * 55 L | |
425 | * 54 reserved | |
426 | * 53:52 SW | |
427 | * 51 T | |
428 | * 50:48 mtype | |
429 | * 47:12 4k physical page base address | |
430 | * 11:7 fragment | |
431 | * 6 write | |
432 | * 5 read | |
433 | * 4 exe | |
434 | * 3 Z | |
435 | * 2 snooped | |
436 | * 1 system | |
437 | * 0 valid | |
438 | * | |
439 | * PDE format: | |
440 | * 63:59 block fragment size | |
441 | * 58:55 reserved | |
442 | * 54 P | |
443 | * 53:48 reserved | |
444 | * 47:6 physical base address of PD or PTE | |
445 | * 5:3 reserved | |
446 | * 2 C | |
447 | * 1 system | |
448 | * 0 valid | |
449 | */ | |
450 | ||
451 | static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) | |
452 | { | |
453 | switch (flags) { | |
454 | case AMDGPU_VM_MTYPE_DEFAULT: | |
455 | return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); | |
456 | case AMDGPU_VM_MTYPE_NC: | |
457 | return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); | |
458 | case AMDGPU_VM_MTYPE_WC: | |
459 | return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); | |
460 | case AMDGPU_VM_MTYPE_CC: | |
461 | return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); | |
462 | case AMDGPU_VM_MTYPE_UC: | |
463 | return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); | |
464 | default: | |
465 | return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); | |
466 | } | |
467 | } | |
468 | ||
469 | static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level, | |
470 | uint64_t *addr, uint64_t *flags) | |
471 | { | |
472 | if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) | |
473 | *addr = adev->vm_manager.vram_base_offset + *addr - | |
474 | adev->gmc.vram_start; | |
475 | BUG_ON(*addr & 0xFFFF00000000003FULL); | |
476 | ||
477 | if (!adev->gmc.translate_further) | |
478 | return; | |
479 | ||
480 | if (level == AMDGPU_VM_PDB1) { | |
481 | /* Set the block fragment size */ | |
482 | if (!(*flags & AMDGPU_PDE_PTE)) | |
483 | *flags |= AMDGPU_PDE_BFS(0x9); | |
484 | ||
485 | } else if (level == AMDGPU_VM_PDB0) { | |
486 | if (*flags & AMDGPU_PDE_PTE) | |
487 | *flags &= ~AMDGPU_PDE_PTE; | |
488 | else | |
489 | *flags |= AMDGPU_PTE_TF; | |
490 | } | |
491 | } | |
492 | ||
493 | static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, | |
494 | struct amdgpu_bo_va_mapping *mapping, | |
495 | uint64_t *flags) | |
496 | { | |
497 | *flags &= ~AMDGPU_PTE_EXECUTABLE; | |
498 | *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; | |
499 | ||
500 | *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; | |
501 | *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); | |
502 | ||
503 | if (mapping->flags & AMDGPU_PTE_PRT) { | |
504 | *flags |= AMDGPU_PTE_PRT; | |
505 | *flags |= AMDGPU_PTE_SNOOPED; | |
506 | *flags |= AMDGPU_PTE_LOG; | |
507 | *flags |= AMDGPU_PTE_SYSTEM; | |
508 | *flags &= ~AMDGPU_PTE_VALID; | |
509 | } | |
510 | } | |
511 | ||
512 | static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev) | |
513 | { | |
514 | return 0; | |
515 | } | |
516 | ||
517 | static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = { | |
518 | .flush_gpu_tlb = gmc_v11_0_flush_gpu_tlb, | |
519 | .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid, | |
520 | .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb, | |
521 | .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping, | |
522 | .map_mtype = gmc_v11_0_map_mtype, | |
523 | .get_vm_pde = gmc_v11_0_get_vm_pde, | |
524 | .get_vm_pte = gmc_v11_0_get_vm_pte, | |
525 | .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size, | |
526 | }; | |
527 | ||
528 | static void gmc_v11_0_set_gmc_funcs(struct amdgpu_device *adev) | |
529 | { | |
530 | adev->gmc.gmc_funcs = &gmc_v11_0_gmc_funcs; | |
531 | } | |
532 | ||
533 | static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) | |
534 | { | |
535 | switch (adev->ip_versions[UMC_HWIP][0]) { | |
536 | case IP_VERSION(8, 10, 0): | |
89ae779b | 537 | case IP_VERSION(8, 11, 0): |
1c2014da TY |
538 | break; |
539 | default: | |
540 | break; | |
541 | } | |
542 | } | |
543 | ||
544 | ||
545 | static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) | |
546 | { | |
f40fc191 HZ |
547 | switch (adev->ip_versions[MMHUB_HWIP][0]) { |
548 | case IP_VERSION(3, 0, 2): | |
549 | adev->mmhub.funcs = &mmhub_v3_0_2_funcs; | |
550 | break; | |
551 | default: | |
552 | adev->mmhub.funcs = &mmhub_v3_0_funcs; | |
553 | break; | |
554 | } | |
1c2014da TY |
555 | } |
556 | ||
557 | static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) | |
558 | { | |
559 | adev->gfxhub.funcs = &gfxhub_v3_0_funcs; | |
560 | } | |
561 | ||
562 | static int gmc_v11_0_early_init(void *handle) | |
563 | { | |
564 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
565 | ||
566 | gmc_v11_0_set_gfxhub_funcs(adev); | |
567 | gmc_v11_0_set_mmhub_funcs(adev); | |
568 | gmc_v11_0_set_gmc_funcs(adev); | |
569 | gmc_v11_0_set_irq_funcs(adev); | |
570 | gmc_v11_0_set_umc_funcs(adev); | |
571 | ||
572 | adev->gmc.shared_aperture_start = 0x2000000000000000ULL; | |
573 | adev->gmc.shared_aperture_end = | |
574 | adev->gmc.shared_aperture_start + (4ULL << 30) - 1; | |
575 | adev->gmc.private_aperture_start = 0x1000000000000000ULL; | |
576 | adev->gmc.private_aperture_end = | |
577 | adev->gmc.private_aperture_start + (4ULL << 30) - 1; | |
578 | ||
579 | return 0; | |
580 | } | |
581 | ||
582 | static int gmc_v11_0_late_init(void *handle) | |
583 | { | |
584 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
585 | int r; | |
586 | ||
587 | r = amdgpu_gmc_allocate_vm_inv_eng(adev); | |
588 | if (r) | |
589 | return r; | |
590 | ||
591 | r = amdgpu_gmc_ras_late_init(adev); | |
592 | if (r) | |
593 | return r; | |
594 | ||
595 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); | |
596 | } | |
597 | ||
598 | static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, | |
599 | struct amdgpu_gmc *mc) | |
600 | { | |
601 | u64 base = 0; | |
602 | ||
603 | base = adev->mmhub.funcs->get_fb_location(adev); | |
604 | ||
605 | amdgpu_gmc_vram_location(adev, &adev->gmc, base); | |
606 | amdgpu_gmc_gart_location(adev, mc); | |
607 | ||
608 | /* base offset of vram pages */ | |
609 | adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev); | |
610 | } | |
611 | ||
612 | /** | |
613 | * gmc_v11_0_mc_init - initialize the memory controller driver params | |
614 | * | |
615 | * @adev: amdgpu_device pointer | |
616 | * | |
617 | * Look up the amount of vram, vram width, and decide how to place | |
618 | * vram and gart within the GPU's physical address space. | |
619 | * Returns 0 for success. | |
620 | */ | |
621 | static int gmc_v11_0_mc_init(struct amdgpu_device *adev) | |
622 | { | |
623 | int r; | |
624 | ||
625 | /* size in MB on si */ | |
626 | adev->gmc.mc_vram_size = | |
627 | adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; | |
628 | adev->gmc.real_vram_size = adev->gmc.mc_vram_size; | |
629 | ||
630 | if (!(adev->flags & AMD_IS_APU)) { | |
631 | r = amdgpu_device_resize_fb_bar(adev); | |
632 | if (r) | |
633 | return r; | |
634 | } | |
635 | adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); | |
636 | adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); | |
637 | ||
638 | /* In case the PCI BAR is larger than the actual amount of vram */ | |
639 | adev->gmc.visible_vram_size = adev->gmc.aper_size; | |
640 | if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) | |
641 | adev->gmc.visible_vram_size = adev->gmc.real_vram_size; | |
642 | ||
643 | /* set the gart size */ | |
644 | if (amdgpu_gart_size == -1) { | |
645 | adev->gmc.gart_size = 512ULL << 20; | |
646 | } else | |
647 | adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; | |
648 | ||
649 | gmc_v11_0_vram_gtt_location(adev, &adev->gmc); | |
650 | ||
651 | return 0; | |
652 | } | |
653 | ||
654 | static int gmc_v11_0_gart_init(struct amdgpu_device *adev) | |
655 | { | |
656 | int r; | |
657 | ||
658 | if (adev->gart.bo) { | |
659 | WARN(1, "PCIE GART already initialized\n"); | |
660 | return 0; | |
661 | } | |
662 | ||
663 | /* Initialize common gart structure */ | |
664 | r = amdgpu_gart_init(adev); | |
665 | if (r) | |
666 | return r; | |
667 | ||
668 | adev->gart.table_size = adev->gart.num_gpu_pages * 8; | |
669 | adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | | |
670 | AMDGPU_PTE_EXECUTABLE; | |
671 | ||
672 | return amdgpu_gart_table_vram_alloc(adev); | |
673 | } | |
674 | ||
675 | static int gmc_v11_0_sw_init(void *handle) | |
676 | { | |
677 | int r, vram_width = 0, vram_type = 0, vram_vendor = 0; | |
678 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
679 | ||
680 | adev->mmhub.funcs->init(adev); | |
681 | ||
682 | spin_lock_init(&adev->gmc.invalidate_lock); | |
683 | ||
684 | r = amdgpu_atomfirmware_get_vram_info(adev, | |
685 | &vram_width, &vram_type, &vram_vendor); | |
686 | adev->gmc.vram_width = vram_width; | |
687 | ||
688 | adev->gmc.vram_type = vram_type; | |
689 | adev->gmc.vram_vendor = vram_vendor; | |
690 | ||
691 | switch (adev->ip_versions[GC_HWIP][0]) { | |
692 | case IP_VERSION(11, 0, 0): | |
ee367aed | 693 | case IP_VERSION(11, 0, 1): |
f2754bf7 | 694 | case IP_VERSION(11, 0, 2): |
1c2014da TY |
695 | adev->num_vmhubs = 2; |
696 | /* | |
697 | * To fulfill 4-level page support, | |
698 | * vm size is 256TB (48bit), maximum size, | |
699 | * block size 512 (9bit) | |
700 | */ | |
701 | amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); | |
702 | break; | |
703 | default: | |
704 | break; | |
705 | } | |
706 | ||
707 | /* This interrupt is VMC page fault.*/ | |
708 | r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC, | |
709 | VMC_1_0__SRCID__VM_FAULT, | |
710 | &adev->gmc.vm_fault); | |
711 | ||
712 | if (r) | |
713 | return r; | |
714 | ||
715 | r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, | |
716 | UTCL2_1_0__SRCID__FAULT, | |
717 | &adev->gmc.vm_fault); | |
718 | if (r) | |
719 | return r; | |
720 | ||
721 | if (!amdgpu_sriov_vf(adev)) { | |
722 | /* interrupt sent to DF. */ | |
723 | r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0, | |
724 | &adev->gmc.ecc_irq); | |
725 | if (r) | |
726 | return r; | |
727 | } | |
728 | ||
729 | /* | |
730 | * Set the internal MC address mask This is the max address of the GPU's | |
731 | * internal address space. | |
732 | */ | |
733 | adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ | |
734 | ||
735 | r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); | |
736 | if (r) { | |
737 | printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); | |
738 | return r; | |
739 | } | |
740 | ||
741 | r = gmc_v11_0_mc_init(adev); | |
742 | if (r) | |
743 | return r; | |
744 | ||
745 | amdgpu_gmc_get_vbios_allocations(adev); | |
746 | ||
747 | /* Memory manager */ | |
748 | r = amdgpu_bo_init(adev); | |
749 | if (r) | |
750 | return r; | |
751 | ||
752 | r = gmc_v11_0_gart_init(adev); | |
753 | if (r) | |
754 | return r; | |
755 | ||
756 | /* | |
757 | * number of VMs | |
758 | * VMID 0 is reserved for System | |
759 | * amdgpu graphics/compute will use VMIDs 1-7 | |
760 | * amdkfd will use VMIDs 8-15 | |
761 | */ | |
762 | adev->vm_manager.first_kfd_vmid = 8; | |
763 | ||
764 | amdgpu_vm_manager_init(adev); | |
765 | ||
766 | return 0; | |
767 | } | |
768 | ||
769 | /** | |
770 | * gmc_v11_0_gart_fini - vm fini callback | |
771 | * | |
772 | * @adev: amdgpu_device pointer | |
773 | * | |
774 | * Tears down the driver GART/VM setup (CIK). | |
775 | */ | |
776 | static void gmc_v11_0_gart_fini(struct amdgpu_device *adev) | |
777 | { | |
778 | amdgpu_gart_table_vram_free(adev); | |
779 | } | |
780 | ||
781 | static int gmc_v11_0_sw_fini(void *handle) | |
782 | { | |
783 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
784 | ||
785 | amdgpu_vm_manager_fini(adev); | |
786 | gmc_v11_0_gart_fini(adev); | |
787 | amdgpu_gem_force_release(adev); | |
788 | amdgpu_bo_fini(adev); | |
789 | ||
790 | return 0; | |
791 | } | |
792 | ||
793 | static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev) | |
794 | { | |
795 | } | |
796 | ||
797 | /** | |
798 | * gmc_v11_0_gart_enable - gart enable | |
799 | * | |
800 | * @adev: amdgpu_device pointer | |
801 | */ | |
802 | static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) | |
803 | { | |
804 | int r; | |
805 | bool value; | |
806 | ||
807 | if (adev->gart.bo == NULL) { | |
808 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); | |
809 | return -EINVAL; | |
810 | } | |
811 | ||
812 | amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); | |
18ee4ce6 | 813 | |
1c2014da TY |
814 | r = adev->mmhub.funcs->gart_enable(adev); |
815 | if (r) | |
816 | return r; | |
817 | ||
818 | /* Flush HDP after it is initialized */ | |
819 | adev->hdp.funcs->flush_hdp(adev, NULL); | |
820 | ||
821 | value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? | |
822 | false : true; | |
823 | ||
824 | adev->mmhub.funcs->set_fault_enable_default(adev, value); | |
825 | gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); | |
826 | ||
827 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", | |
828 | (unsigned)(adev->gmc.gart_size >> 20), | |
829 | (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); | |
830 | ||
831 | return 0; | |
832 | } | |
833 | ||
834 | static int gmc_v11_0_hw_init(void *handle) | |
835 | { | |
836 | int r; | |
837 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
838 | ||
839 | /* The sequence of these two function calls matters.*/ | |
840 | gmc_v11_0_init_golden_registers(adev); | |
841 | ||
842 | r = gmc_v11_0_gart_enable(adev); | |
843 | if (r) | |
844 | return r; | |
845 | ||
846 | if (adev->umc.funcs && adev->umc.funcs->init_registers) | |
847 | adev->umc.funcs->init_registers(adev); | |
848 | ||
849 | return 0; | |
850 | } | |
851 | ||
852 | /** | |
853 | * gmc_v11_0_gart_disable - gart disable | |
854 | * | |
855 | * @adev: amdgpu_device pointer | |
856 | * | |
857 | * This disables all VM page table. | |
858 | */ | |
859 | static void gmc_v11_0_gart_disable(struct amdgpu_device *adev) | |
860 | { | |
861 | adev->mmhub.funcs->gart_disable(adev); | |
862 | } | |
863 | ||
864 | static int gmc_v11_0_hw_fini(void *handle) | |
865 | { | |
866 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
867 | ||
868 | if (amdgpu_sriov_vf(adev)) { | |
869 | /* full access mode, so don't touch any GMC register */ | |
870 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); | |
871 | return 0; | |
872 | } | |
873 | ||
874 | amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); | |
875 | amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); | |
876 | gmc_v11_0_gart_disable(adev); | |
877 | ||
878 | return 0; | |
879 | } | |
880 | ||
881 | static int gmc_v11_0_suspend(void *handle) | |
882 | { | |
883 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
884 | ||
885 | gmc_v11_0_hw_fini(adev); | |
886 | ||
887 | return 0; | |
888 | } | |
889 | ||
890 | static int gmc_v11_0_resume(void *handle) | |
891 | { | |
892 | int r; | |
893 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
894 | ||
895 | r = gmc_v11_0_hw_init(adev); | |
896 | if (r) | |
897 | return r; | |
898 | ||
899 | amdgpu_vmid_reset_all(adev); | |
900 | ||
901 | return 0; | |
902 | } | |
903 | ||
904 | static bool gmc_v11_0_is_idle(void *handle) | |
905 | { | |
906 | /* MC is always ready in GMC v11.*/ | |
907 | return true; | |
908 | } | |
909 | ||
910 | static int gmc_v11_0_wait_for_idle(void *handle) | |
911 | { | |
912 | /* There is no need to wait for MC idle in GMC v11.*/ | |
913 | return 0; | |
914 | } | |
915 | ||
916 | static int gmc_v11_0_soft_reset(void *handle) | |
917 | { | |
918 | return 0; | |
919 | } | |
920 | ||
921 | static int gmc_v11_0_set_clockgating_state(void *handle, | |
922 | enum amd_clockgating_state state) | |
923 | { | |
924 | int r; | |
925 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
926 | ||
927 | r = adev->mmhub.funcs->set_clockgating(adev, state); | |
928 | if (r) | |
929 | return r; | |
930 | ||
931 | return athub_v3_0_set_clockgating(adev, state); | |
932 | } | |
933 | ||
934 | static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags) | |
935 | { | |
936 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | |
937 | ||
938 | adev->mmhub.funcs->get_clockgating(adev, flags); | |
939 | ||
940 | athub_v3_0_get_clockgating(adev, flags); | |
941 | } | |
942 | ||
943 | static int gmc_v11_0_set_powergating_state(void *handle, | |
944 | enum amd_powergating_state state) | |
945 | { | |
946 | return 0; | |
947 | } | |
948 | ||
949 | const struct amd_ip_funcs gmc_v11_0_ip_funcs = { | |
950 | .name = "gmc_v11_0", | |
951 | .early_init = gmc_v11_0_early_init, | |
952 | .sw_init = gmc_v11_0_sw_init, | |
953 | .hw_init = gmc_v11_0_hw_init, | |
954 | .late_init = gmc_v11_0_late_init, | |
955 | .sw_fini = gmc_v11_0_sw_fini, | |
956 | .hw_fini = gmc_v11_0_hw_fini, | |
957 | .suspend = gmc_v11_0_suspend, | |
958 | .resume = gmc_v11_0_resume, | |
959 | .is_idle = gmc_v11_0_is_idle, | |
960 | .wait_for_idle = gmc_v11_0_wait_for_idle, | |
961 | .soft_reset = gmc_v11_0_soft_reset, | |
962 | .set_clockgating_state = gmc_v11_0_set_clockgating_state, | |
963 | .set_powergating_state = gmc_v11_0_set_powergating_state, | |
964 | .get_clockgating_state = gmc_v11_0_get_clockgating_state, | |
965 | }; | |
966 | ||
967 | const struct amdgpu_ip_block_version gmc_v11_0_ip_block = { | |
968 | .type = AMD_IP_BLOCK_TYPE_GMC, | |
969 | .major = 11, | |
970 | .minor = 0, | |
971 | .rev = 0, | |
972 | .funcs = &gmc_v11_0_ip_funcs, | |
973 | }; |