1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/types.h>
7 #include <asm/set_memory.h>
10 #include <drm/i915_drm.h>
12 #include "gem/i915_gem_lmem.h"
15 #include "intel_gt_gmch.h"
16 #include "intel_gt_regs.h"
18 #include "i915_scatterlist.h"
19 #include "i915_utils.h"
20 #include "i915_vgpu.h"
22 #include "intel_gtt.h"
23 #include "gen8_ppgtt.h"
25 static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
30 if (i915_node_color_differs(node, color))
31 *start += I915_GTT_PAGE_SIZE;
34 * Also leave a space between the unallocated reserved node after the
35 * GTT and any objects within the GTT, i.e. we use the color adjustment
36 * to insert a guard page to prevent prefetches crossing over the
39 node = list_next_entry(node, node_list);
40 if (node->color != color)
41 *end -= I915_GTT_PAGE_SIZE;
44 static int ggtt_init_hw(struct i915_ggtt *ggtt)
46 struct drm_i915_private *i915 = ggtt->vm.i915;
48 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
50 ggtt->vm.is_ggtt = true;
52 /* Only VLV supports read-only GGTT mappings */
53 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
55 if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
56 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
58 if (ggtt->mappable_end) {
59 if (!io_mapping_init_wc(&ggtt->iomap,
61 ggtt->mappable_end)) {
62 ggtt->vm.cleanup(&ggtt->vm);
66 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
70 intel_ggtt_init_fences(ggtt);
76 * i915_ggtt_init_hw - Initialize GGTT hardware
79 int i915_ggtt_init_hw(struct drm_i915_private *i915)
84 * Note that we use page colouring to enforce a guard page at the
85 * end of the address space. This is required as the CS may prefetch
86 * beyond the end of the batch buffer, across the page boundary,
87 * and beyond the end of the GTT if we do not provide a guard.
89 ret = ggtt_init_hw(to_gt(i915)->ggtt);
97 * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
98 * @vm: The VM to suspend the mappings for
100 * Suspend the memory mappings for all objects mapped to HW via the GGTT or a
103 void i915_ggtt_suspend_vm(struct i915_address_space *vm)
105 struct i915_vma *vma, *vn;
106 int save_skip_rewrite;
108 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
111 i915_gem_drain_freed_objects(vm->i915);
113 mutex_lock(&vm->mutex);
116 * Skip rewriting PTE on VMA unbind.
117 * FIXME: Use an argument to i915_vma_unbind() instead?
119 save_skip_rewrite = vm->skip_pte_rewrite;
120 vm->skip_pte_rewrite = true;
122 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
123 struct drm_i915_gem_object *obj = vma->obj;
125 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
127 if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
130 /* unlikely to race when GPU is idle, so no worry about slowpath.. */
131 if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) {
133 * No dead objects should appear here, GPU should be
134 * completely idle, and userspace suspended
136 i915_gem_object_get(obj);
138 mutex_unlock(&vm->mutex);
140 i915_gem_object_lock(obj, NULL);
141 GEM_WARN_ON(i915_vma_unbind(vma));
142 i915_gem_object_unlock(obj);
143 i915_gem_object_put(obj);
145 vm->skip_pte_rewrite = save_skip_rewrite;
149 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
150 i915_vma_wait_for_bind(vma);
152 __i915_vma_evict(vma, false);
153 drm_mm_remove_node(&vma->node);
156 i915_gem_object_unlock(obj);
159 vm->clear_range(vm, 0, vm->total);
161 vm->skip_pte_rewrite = save_skip_rewrite;
163 mutex_unlock(&vm->mutex);
166 void i915_ggtt_suspend(struct i915_ggtt *ggtt)
168 i915_ggtt_suspend_vm(&ggtt->vm);
169 ggtt->invalidate(ggtt);
171 intel_gt_check_and_clear_faults(ggtt->vm.gt);
174 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
176 struct intel_uncore *uncore = ggtt->vm.gt->uncore;
178 spin_lock_irq(&uncore->lock);
179 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
180 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
181 spin_unlock_irq(&uncore->lock);
184 void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
186 struct intel_uncore *uncore = ggtt->vm.gt->uncore;
189 * Note that as an uncached mmio write, this will flush the
190 * WCB of the writes into the GGTT before it triggers the invalidate.
192 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
195 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
197 struct intel_uncore *uncore = ggtt->vm.gt->uncore;
198 struct drm_i915_private *i915 = ggtt->vm.i915;
200 gen8_ggtt_invalidate(ggtt);
202 if (GRAPHICS_VER(i915) >= 12)
203 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
204 GEN12_GUC_TLB_INV_CR_INVALIDATE);
206 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
209 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
210 enum i915_cache_level level,
213 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
216 pte |= GEN12_GGTT_PTE_LM;
221 void intel_ggtt_bind_vma(struct i915_address_space *vm,
222 struct i915_vm_pt_stash *stash,
223 struct i915_vma_resource *vma_res,
224 enum i915_cache_level cache_level,
229 if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
232 vma_res->bound_flags |= flags;
234 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
236 if (vma_res->bi.readonly)
237 pte_flags |= PTE_READ_ONLY;
238 if (vma_res->bi.lmem)
241 vm->insert_entries(vm, vma_res, cache_level, pte_flags);
242 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
245 void intel_ggtt_unbind_vma(struct i915_address_space *vm,
246 struct i915_vma_resource *vma_res)
248 vm->clear_range(vm, vma_res->start, vma_res->vma_size);
251 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
256 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
259 GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
260 size = ggtt->vm.total - GUC_GGTT_TOP;
262 ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
263 GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
266 drm_dbg(&ggtt->vm.i915->drm,
267 "Failed to reserve top of GGTT for GuC\n");
272 static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
274 if (drm_mm_node_allocated(&ggtt->uc_fw))
275 drm_mm_remove_node(&ggtt->uc_fw);
278 static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
280 ggtt_release_guc_top(ggtt);
281 if (drm_mm_node_allocated(&ggtt->error_capture))
282 drm_mm_remove_node(&ggtt->error_capture);
283 mutex_destroy(&ggtt->error_mutex);
286 static int init_ggtt(struct i915_ggtt *ggtt)
289 * Let GEM Manage all of the aperture.
291 * However, leave one page at the end still bound to the scratch page.
292 * There are a number of places where the hardware apparently prefetches
293 * past the end of the object, and we've seen multiple hangs with the
294 * GPU head pointer stuck in a batchbuffer bound at the last page of the
295 * aperture. One page should be enough to keep any prefetching inside
298 unsigned long hole_start, hole_end;
299 struct drm_mm_node *entry;
303 * GuC requires all resources that we're sharing with it to be placed in
304 * non-WOPCM memory. If GuC is not present or not in use we still need a
305 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
308 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
309 intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
311 ret = intel_vgt_balloon(ggtt);
315 mutex_init(&ggtt->error_mutex);
316 if (ggtt->mappable_end) {
318 * Reserve a mappable slot for our lockless error capture.
320 * We strongly prefer taking address 0x0 in order to protect
321 * other critical buffers against accidental overwrites,
322 * as writing to address 0 is a very common mistake.
324 * Since 0 may already be in use by the system (e.g. the BIOS
325 * framebuffer), we let the reservation fail quietly and hope
326 * 0 remains reserved always.
328 * If we fail to reserve 0, and then fail to find any space
329 * for an error-capture, remain silent. We can afford not
330 * to reserve an error_capture node as we have fallback
331 * paths, and we trust that 0 will remain reserved. However,
332 * the only likely reason for failure to insert is a driver
333 * bug, which we expect to cause other failures...
335 ggtt->error_capture.size = I915_GTT_PAGE_SIZE;
336 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
337 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture))
338 drm_mm_insert_node_in_range(&ggtt->vm.mm,
339 &ggtt->error_capture,
340 ggtt->error_capture.size, 0,
341 ggtt->error_capture.color,
342 0, ggtt->mappable_end,
345 if (drm_mm_node_allocated(&ggtt->error_capture))
346 drm_dbg(&ggtt->vm.i915->drm,
347 "Reserved GGTT:[%llx, %llx] for use by error capture\n",
348 ggtt->error_capture.start,
349 ggtt->error_capture.start + ggtt->error_capture.size);
352 * The upper portion of the GuC address space has a sizeable hole
353 * (several MB) that is inaccessible by GuC. Reserve this range within
354 * GGTT as it can comfortably hold GuC/HuC firmware images.
356 ret = ggtt_reserve_guc_top(ggtt);
360 /* Clear any non-preallocated blocks */
361 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
362 drm_dbg(&ggtt->vm.i915->drm,
363 "clearing unused GTT space: [%lx, %lx]\n",
364 hole_start, hole_end);
365 ggtt->vm.clear_range(&ggtt->vm, hole_start,
366 hole_end - hole_start);
369 /* And finally clear the reserved guard page */
370 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
375 cleanup_init_ggtt(ggtt);
379 static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
380 struct i915_vm_pt_stash *stash,
381 struct i915_vma_resource *vma_res,
382 enum i915_cache_level cache_level,
387 /* Currently applicable only to VLV */
389 if (vma_res->bi.readonly)
390 pte_flags |= PTE_READ_ONLY;
392 if (flags & I915_VMA_LOCAL_BIND)
393 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
394 stash, vma_res, cache_level, flags);
396 if (flags & I915_VMA_GLOBAL_BIND)
397 vm->insert_entries(vm, vma_res, cache_level, pte_flags);
399 vma_res->bound_flags |= flags;
402 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
403 struct i915_vma_resource *vma_res)
405 if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
406 vm->clear_range(vm, vma_res->start, vma_res->vma_size);
408 if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
409 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res);
412 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
414 struct i915_vm_pt_stash stash = {};
415 struct i915_ppgtt *ppgtt;
418 ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
420 return PTR_ERR(ppgtt);
422 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
427 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
431 i915_gem_object_lock(ppgtt->vm.scratch[0], NULL);
432 err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
433 i915_gem_object_unlock(ppgtt->vm.scratch[0]);
438 * Note we only pre-allocate as far as the end of the global
439 * GTT. On 48b / 4-level page-tables, the difference is very,
440 * very significant! We have to preallocate as GVT/vgpu does
441 * not like the page directory disappearing.
443 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
446 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
448 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma);
449 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
451 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma);
452 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
454 i915_vm_free_pt_stash(&ppgtt->vm, &stash);
458 i915_vm_free_pt_stash(&ppgtt->vm, &stash);
460 i915_vm_put(&ppgtt->vm);
464 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
466 struct i915_ppgtt *ppgtt;
468 ppgtt = fetch_and_zero(&ggtt->alias);
472 i915_vm_put(&ppgtt->vm);
474 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
475 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
478 int i915_init_ggtt(struct drm_i915_private *i915)
482 ret = init_ggtt(to_gt(i915)->ggtt);
486 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
487 ret = init_aliasing_ppgtt(to_gt(i915)->ggtt);
489 cleanup_init_ggtt(to_gt(i915)->ggtt);
495 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
497 struct i915_vma *vma, *vn;
499 flush_workqueue(ggtt->vm.i915->wq);
500 i915_gem_drain_freed_objects(ggtt->vm.i915);
502 mutex_lock(&ggtt->vm.mutex);
504 ggtt->vm.skip_pte_rewrite = true;
506 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
507 struct drm_i915_gem_object *obj = vma->obj;
510 trylock = i915_gem_object_trylock(obj, NULL);
513 WARN_ON(__i915_vma_unbind(vma));
515 i915_gem_object_unlock(obj);
518 if (drm_mm_node_allocated(&ggtt->error_capture))
519 drm_mm_remove_node(&ggtt->error_capture);
520 mutex_destroy(&ggtt->error_mutex);
522 ggtt_release_guc_top(ggtt);
523 intel_vgt_deballoon(ggtt);
525 ggtt->vm.cleanup(&ggtt->vm);
527 mutex_unlock(&ggtt->vm.mutex);
528 i915_address_space_fini(&ggtt->vm);
530 arch_phys_wc_del(ggtt->mtrr);
532 if (ggtt->iomap.size)
533 io_mapping_fini(&ggtt->iomap);
537 * i915_ggtt_driver_release - Clean up GGTT hardware initialization
540 void i915_ggtt_driver_release(struct drm_i915_private *i915)
542 struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
544 fini_aliasing_ppgtt(ggtt);
546 intel_ggtt_fini_fences(ggtt);
547 ggtt_cleanup_hw(ggtt);
551 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after
552 * all free objects have been drained.
555 void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
557 struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
559 GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
560 dma_resv_fini(&ggtt->vm._resv);
563 struct resource intel_pci_resource(struct pci_dev *pdev, int bar)
565 return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
566 pci_resource_len(pdev, bar));
569 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
571 struct drm_i915_private *i915 = gt->i915;
575 ggtt->vm.i915 = i915;
576 ggtt->vm.dma = i915->drm.dev;
577 dma_resv_init(&ggtt->vm._resv);
579 if (GRAPHICS_VER(i915) <= 5)
580 ret = intel_gt_gmch_gen5_probe(ggtt);
581 else if (GRAPHICS_VER(i915) < 8)
582 ret = intel_gt_gmch_gen6_probe(ggtt);
584 ret = intel_gt_gmch_gen8_probe(ggtt);
586 dma_resv_fini(&ggtt->vm._resv);
590 if ((ggtt->vm.total - 1) >> 32) {
592 "We never expected a Global GTT with more than 32bits"
593 " of address space! Found %lldM!\n",
594 ggtt->vm.total >> 20);
595 ggtt->vm.total = 1ULL << 32;
597 min_t(u64, ggtt->mappable_end, ggtt->vm.total);
600 if (ggtt->mappable_end > ggtt->vm.total) {
602 "mappable aperture extends past end of GGTT,"
603 " aperture=%pa, total=%llx\n",
604 &ggtt->mappable_end, ggtt->vm.total);
605 ggtt->mappable_end = ggtt->vm.total;
608 /* GMADR is the PCI mmio aperture into the global GTT. */
609 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
610 drm_dbg(&i915->drm, "GMADR size = %lluM\n",
611 (u64)ggtt->mappable_end >> 20);
612 drm_dbg(&i915->drm, "DSM size = %lluM\n",
613 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
619 * i915_ggtt_probe_hw - Probe GGTT hardware location
622 int i915_ggtt_probe_hw(struct drm_i915_private *i915)
626 ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
630 if (i915_vtd_active(i915))
631 drm_info(&i915->drm, "VT-d active for gfx access\n");
636 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
638 return intel_gt_gmch_gen5_enable_hw(i915);
641 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
643 GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
645 ggtt->invalidate = guc_ggtt_invalidate;
647 ggtt->invalidate(ggtt);
650 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
652 /* XXX Temporary pardon for error unload */
653 if (ggtt->invalidate == gen8_ggtt_invalidate)
656 /* We should only be called after i915_ggtt_enable_guc() */
657 GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
659 ggtt->invalidate = gen8_ggtt_invalidate;
661 ggtt->invalidate(ggtt);
665 * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
666 * @vm: The VM to restore the mappings for
668 * Restore the memory mappings for all objects mapped to HW via the GGTT or a
671 * Returns %true if restoring the mapping for any object that was in a write
672 * domain before suspend.
674 bool i915_ggtt_resume_vm(struct i915_address_space *vm)
676 struct i915_vma *vma;
677 bool write_domain_objs = false;
679 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
681 /* First fill our portion of the GTT with scratch pages */
682 vm->clear_range(vm, 0, vm->total);
684 /* clflush objects bound into the GGTT and rebind them. */
685 list_for_each_entry(vma, &vm->bound_list, vm_link) {
686 struct drm_i915_gem_object *obj = vma->obj;
687 unsigned int was_bound =
688 atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
690 GEM_BUG_ON(!was_bound);
691 vma->ops->bind_vma(vm, NULL, vma->resource,
692 obj ? obj->cache_level : 0,
694 if (obj) { /* only used during resume => exclusive access */
695 write_domain_objs |= fetch_and_zero(&obj->write_domain);
696 obj->read_domains |= I915_GEM_DOMAIN_GTT;
700 return write_domain_objs;
703 void i915_ggtt_resume(struct i915_ggtt *ggtt)
707 intel_gt_check_and_clear_faults(ggtt->vm.gt);
709 flush = i915_ggtt_resume_vm(&ggtt->vm);
711 ggtt->invalidate(ggtt);
714 wbinvd_on_all_cpus();
716 if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
717 setup_private_pat(ggtt->vm.gt->uncore);
719 intel_ggtt_restore_fences(ggtt);