[linux-block.git] / drivers / gpu / drm / i915 / gt / intel_gtt.c

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2020 Intel Corporation
 */

#include <linux/slab.h> /* fault-inject.h is not standalone! */

#include <linux/fault-inject.h>

#include "gem/i915_gem_lmem.h"
#include "i915_trace.h"
#include "intel_gt.h"
#include "intel_gtt.h"

struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
{
	struct drm_i915_gem_object *obj;

	/*
	 * To avoid severe over-allocation when dealing with min_page_size
	 * restrictions, we override that behaviour here by allowing an object
	 * size and page layout which can be smaller. In practice this should be
	 * totally fine, since GTT paging structures are not typically inserted
	 * into the GTT.
	 *
	 * Note that we also hit this path for the scratch page, and for this
	 * case it might need to be 64K, but that should work fine here since we
	 * used the passed in size for the page size, which should ensure it
	 * also has the same alignment.
	 */
	obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0);
	/*
	 * Ensure all paging structures for this vm share the same dma-resv
	 * object underneath, with the idea that one object_lock() will lock
	 * them all at once.
	 */
	if (!IS_ERR(obj)) {
		obj->base.resv = i915_vm_resv_get(vm);
		obj->shares_resv_from = vm;
	}

	return obj;
}

struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
{
	struct drm_i915_gem_object *obj;

	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
		i915_gem_shrink_all(vm->i915);

	obj = i915_gem_object_create_internal(vm->i915, sz);
	/*
	 * Ensure all paging structures for this vm share the same dma-resv
	 * object underneath, with the idea that one object_lock() will lock
	 * them all at once.
	 */
	if (!IS_ERR(obj)) {
		obj->base.resv = i915_vm_resv_get(vm);
		obj->shares_resv_from = vm;
	}

	return obj;
}

int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
{
	enum i915_map_type type;
	void *vaddr;

	type = i915_coherent_map_type(vm->i915, obj, true);
	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
	if (IS_ERR(vaddr))
		return PTR_ERR(vaddr);

	i915_gem_object_make_unshrinkable(obj);
	return 0;
}

int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
{
	enum i915_map_type type;
	void *vaddr;

	type = i915_coherent_map_type(vm->i915, obj, true);
	vaddr = i915_gem_object_pin_map(obj, type);
	if (IS_ERR(vaddr))
		return PTR_ERR(vaddr);

	i915_gem_object_make_unshrinkable(obj);
	return 0;
}

void __i915_vm_close(struct i915_address_space *vm)
{
	struct i915_vma *vma, *vn;

	if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
		return;

	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
		struct drm_i915_gem_object *obj = vma->obj;

		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
		if (!kref_get_unless_zero(&obj->base.refcount))
			continue;

		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
		WARN_ON(__i915_vma_unbind(vma));
		__i915_vma_put(vma);

		i915_gem_object_put(obj);
	}
	GEM_BUG_ON(!list_empty(&vm->bound_list));

	mutex_unlock(&vm->mutex);
}

/* lock the vm into the current ww, if we lock one, we lock all */
int i915_vm_lock_objects(struct i915_address_space *vm,
			 struct i915_gem_ww_ctx *ww)
{
	if (vm->scratch[0]->base.resv == &vm->_resv) {
		return i915_gem_object_lock(vm->scratch[0], ww);
	} else {
		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);

		/* We borrowed the scratch page from ggtt, take the top level object */
		return i915_gem_object_lock(ppgtt->pd->pt.base, ww);
	}
}

void i915_address_space_fini(struct i915_address_space *vm)
{
	drm_mm_takedown(&vm->mm);
	mutex_destroy(&vm->mutex);
}

/**
 * i915_vm_resv_release - Final struct i915_address_space destructor
 * @kref: Pointer to the &i915_address_space.resv_ref member.
 *
 * This function is called when the last lock sharer no longer shares the
 * &i915_address_space._resv lock.
 */
void i915_vm_resv_release(struct kref *kref)
{
	struct i915_address_space *vm =
		container_of(kref, typeof(*vm), resv_ref);

	dma_resv_fini(&vm->_resv);
	kfree(vm);
}

static void __i915_vm_release(struct work_struct *work)
{
	struct i915_address_space *vm =
		container_of(work, struct i915_address_space, release_work);

	vm->cleanup(vm);
	i915_address_space_fini(vm);

	i915_vm_resv_put(vm);
}

void i915_vm_release(struct kref *kref)
{
	struct i915_address_space *vm =
		container_of(kref, struct i915_address_space, ref);

	GEM_BUG_ON(i915_is_ggtt(vm));
	trace_i915_ppgtt_release(vm);

	queue_work(vm->i915->wq, &vm->release_work);
}

void i915_address_space_init(struct i915_address_space *vm, int subclass)
{
	kref_init(&vm->ref);

	/*
	 * Special case for GGTT that has already done an early
	 * kref_init here.
	 */
	if (!kref_read(&vm->resv_ref))
		kref_init(&vm->resv_ref);

	INIT_WORK(&vm->release_work, __i915_vm_release);
	atomic_set(&vm->open, 1);

	/*
	 * The vm->mutex must be reclaim safe (for use in the shrinker).
	 * Do a dummy acquire now under fs_reclaim so that any allocation
	 * attempt holding the lock is immediately reported by lockdep.
	 */
	mutex_init(&vm->mutex);
	lockdep_set_subclass(&vm->mutex, subclass);

	if (!intel_vm_no_concurrent_access_wa(vm->i915)) {
		i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
	} else {
		/*
		 * CHV + BXT VTD workaround use stop_machine(),
		 * which is allowed to allocate memory. This means &vm->mutex
		 * is the outer lock, and in theory we can allocate memory inside
		 * it through stop_machine().
		 *
		 * Add the annotation for this, we use trylock in shrinker.
		 */
		mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_);
		might_alloc(GFP_KERNEL);
		mutex_release(&vm->mutex.dep_map, _THIS_IP_);
	}
	dma_resv_init(&vm->_resv);

	GEM_BUG_ON(!vm->total);
	drm_mm_init(&vm->mm, 0, vm->total);
	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;

	INIT_LIST_HEAD(&vm->bound_list);
}

void clear_pages(struct i915_vma *vma)
{
	GEM_BUG_ON(!vma->pages);

	if (vma->pages != vma->obj->mm.pages) {
		sg_free_table(vma->pages);
		kfree(vma->pages);
	}
	vma->pages = NULL;

	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
}

void *__px_vaddr(struct drm_i915_gem_object *p)
{
	enum i915_map_type type;

	GEM_BUG_ON(!i915_gem_object_has_pages(p));
	return page_unpack_bits(p->mm.mapping, &type);
}

dma_addr_t __px_dma(struct drm_i915_gem_object *p)
{
	GEM_BUG_ON(!i915_gem_object_has_pages(p));
	return sg_dma_address(p->mm.pages->sgl);
}

struct page *__px_page(struct drm_i915_gem_object *p)
{
	GEM_BUG_ON(!i915_gem_object_has_pages(p));
	return sg_page(p->mm.pages->sgl);
}

void
fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
{
	void *vaddr = __px_vaddr(p);

	memset64(vaddr, val, count);
	clflush_cache_range(vaddr, PAGE_SIZE);
}

static void poison_scratch_page(struct drm_i915_gem_object *scratch)
{
	void *vaddr = __px_vaddr(scratch);
	u8 val;

	val = 0;
	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
		val = POISON_FREE;

	memset(vaddr, val, scratch->base.size);
}

int setup_scratch_page(struct i915_address_space *vm)
{
	unsigned long size;

	/*
	 * In order to utilize 64K pages for an object with a size < 2M, we will
	 * need to support a 64K scratch page, given that every 16th entry for a
	 * page-table operating in 64K mode must point to a properly aligned 64K
	 * region, including any PTEs which happen to point to scratch.
	 *
	 * This is only relevant for the 48b PPGTT where we support
	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
	 * scratch (read-only) between all vm, we create one 64k scratch page
	 * for all.
	 */
	size = I915_GTT_PAGE_SIZE_4K;
	if (i915_vm_is_4lvl(vm) &&
	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
		size = I915_GTT_PAGE_SIZE_64K;

	do {
		struct drm_i915_gem_object *obj;

		obj = vm->alloc_pt_dma(vm, size);
		if (IS_ERR(obj))
			goto skip;

		if (map_pt_dma(vm, obj))
			goto skip_obj;

		/* We need a single contiguous page for our scratch */
		if (obj->mm.page_sizes.sg < size)
			goto skip_obj;

		/* And it needs to be correspondingly aligned */
		if (__px_dma(obj) & (size - 1))
			goto skip_obj;

		/*
		 * Use a non-zero scratch page for debugging.
		 *
		 * We want a value that should be reasonably obvious
		 * to spot in the error state, while also causing a GPU hang
		 * if executed. We prefer using a clear page in production, so
		 * should it ever be accidentally used, the effect should be
		 * fairly benign.
		 */
		poison_scratch_page(obj);

		vm->scratch[0] = obj;
		vm->scratch_order = get_order(size);
		return 0;

skip_obj:
		i915_gem_object_put(obj);
skip:
		if (size == I915_GTT_PAGE_SIZE_4K)
			return -ENOMEM;

		size = I915_GTT_PAGE_SIZE_4K;
	} while (1);
}

void free_scratch(struct i915_address_space *vm)
{
	int i;

	for (i = 0; i <= vm->top; i++)
		i915_gem_object_put(vm->scratch[i]);
}

void gtt_write_workarounds(struct intel_gt *gt)
{
	struct drm_i915_private *i915 = gt->i915;
	struct intel_uncore *uncore = gt->uncore;

	/*
	 * This function is for gtt related workarounds. This function is
	 * called on driver load and after a GPU reset, so you can place
	 * workarounds here even if they get overwritten by GPU reset.
	 */
	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
	if (IS_BROADWELL(i915))
		intel_uncore_write(uncore,
				   GEN8_L3_LRA_1_GPGPU,
				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
	else if (IS_CHERRYVIEW(i915))
		intel_uncore_write(uncore,
				   GEN8_L3_LRA_1_GPGPU,
				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
	else if (IS_GEN9_LP(i915))
		intel_uncore_write(uncore,
				   GEN8_L3_LRA_1_GPGPU,
				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
	else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11)
		intel_uncore_write(uncore,
				   GEN8_L3_LRA_1_GPGPU,
				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);

	/*
	 * To support 64K PTEs we need to first enable the use of the
	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
	 * shouldn't be needed after GEN10.
	 *
	 * 64K pages were first introduced from BDW+, although technically they
	 * only *work* from gen9+. For pre-BDW we instead have the option for
	 * 32K pages, but we don't currently have any support for it in our
	 * driver.
	 */
	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
	    GRAPHICS_VER(i915) <= 10)
		intel_uncore_rmw(uncore,
				 GEN8_GAMW_ECO_DEV_RW_IA,
				 0,
				 GAMW_ECO_ENABLE_64K_IPS_FIELD);

	if (IS_GRAPHICS_VER(i915, 8, 11)) {
		bool can_use_gtt_cache = true;

		/*
		 * According to the BSpec if we use 2M/1G pages then we also
		 * need to disable the GTT cache. At least on BDW we can see
		 * visual corruption when using 2M pages, and not disabling the
		 * GTT cache.
		 */
		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
			can_use_gtt_cache = false;

		/* WaGttCachingOffByDefault */
		intel_uncore_write(uncore,
				   HSW_GTT_CACHE_EN,
				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
		drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
				 intel_uncore_read(uncore,
						   HSW_GTT_CACHE_EN) == 0);
	}
}

static void tgl_setup_private_ppat(struct intel_uncore *uncore)
{
	/* TGL doesn't support LLC or AGE settings */
	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
}

static void icl_setup_private_ppat(struct intel_uncore *uncore)
{
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(0),
			   GEN8_PPAT_WB | GEN8_PPAT_LLC);
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(1),
			   GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(2),
			   GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(3),
			   GEN8_PPAT_UC);
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(4),
			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(5),
			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(6),
			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
	intel_uncore_write(uncore,
			   GEN10_PAT_INDEX(7),
			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
}

/*
 * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
 * bits. When using advanced contexts each context stores its own PAT, but
 * writing this data shouldn't be harmful even in those cases.
 */
static void bdw_setup_private_ppat(struct intel_uncore *uncore)
{
	struct drm_i915_private *i915 = uncore->i915;
	u64 pat;

	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));

	/* for scanout with eLLC */
	if (GRAPHICS_VER(i915) >= 9)
		pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
	else
		pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);

	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}

static void chv_setup_private_ppat(struct intel_uncore *uncore)
{
	u64 pat;

	/*
	 * Map WB on BDW to snooped on CHV.
	 *
	 * Only the snoop bit has meaning for CHV, the rest is
	 * ignored.
	 *
	 * The hardware will never snoop for certain types of accesses:
	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
	 * - PPGTT page tables
	 * - some other special cycles
	 *
	 * As with BDW, we also need to consider the following for GT accesses:
	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
	 * so RTL will always use the value corresponding to
	 * pat_sel = 000".
	 * Which means we must set the snoop bit in PAT entry 0
	 * in order to keep the global status page working.
	 */

	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(1, 0) |
	      GEN8_PPAT(2, 0) |
	      GEN8_PPAT(3, 0) |
	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(7, CHV_PPAT_SNOOP);

	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}

void setup_private_pat(struct intel_uncore *uncore)
{
	struct drm_i915_private *i915 = uncore->i915;

	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);

	if (GRAPHICS_VER(i915) >= 12)
		tgl_setup_private_ppat(uncore);
	else if (GRAPHICS_VER(i915) >= 11)
		icl_setup_private_ppat(uncore);
	else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
		chv_setup_private_ppat(uncore);
	else
		bdw_setup_private_ppat(uncore);
}

struct i915_vma *
__vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
{
	struct drm_i915_gem_object *obj;
	struct i915_vma *vma;

	obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size));
	if (IS_ERR(obj))
		return ERR_CAST(obj);

	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);

	vma = i915_vma_instance(obj, vm, NULL);
	if (IS_ERR(vma)) {
		i915_gem_object_put(obj);
		return vma;
	}

	return vma;
}

struct i915_vma *
__vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size)
{
	struct i915_vma *vma;
	int err;

	vma = __vm_create_scratch_for_read(vm, size);
	if (IS_ERR(vma))
		return vma;

	err = i915_vma_pin(vma, 0, 0,
			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
	if (err) {
		i915_vma_put(vma);
		return ERR_PTR(err);
	}

	return vma;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_gtt.c"
#endif
Commit	Line	Data
2c86e55d MA	1	// SPDX-License-Identifier: MIT
	2	/*
	3	* Copyright © 2020 Intel Corporation
	4	*/
	5
	6	#include <linux/slab.h> /* fault-inject.h is not standalone! */
	7
	8	#include <linux/fault-inject.h>
	9
6aed5673	10	#include "gem/i915_gem_lmem.h"
2c86e55d MA	11	#include "i915_trace.h"
	12	#include "intel_gt.h"
	13	#include "intel_gtt.h"
	14
6aed5673 MA	15	struct drm_i915_gem_object alloc_pt_lmem(struct i915_address_space vm, int sz)
	16	{
	17	struct drm_i915_gem_object *obj;
	18
32334c9b MA	19	/*
	20	* To avoid severe over-allocation when dealing with min_page_size
	21	* restrictions, we override that behaviour here by allowing an object
	22	* size and page layout which can be smaller. In practice this should be
	23	* totally fine, since GTT paging structures are not typically inserted
	24	* into the GTT.
	25	*
	26	* Note that we also hit this path for the scratch page, and for this
	27	* case it might need to be 64K, but that should work fine here since we
	28	* used the passed in size for the page size, which should ensure it
	29	* also has the same alignment.
	30	*/
	31	obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0);
6aed5673 MA	32	/*
	33	* Ensure all paging structures for this vm share the same dma-resv
	34	* object underneath, with the idea that one object_lock() will lock
	35	* them all at once.
	36	*/
4d8151ae TH	37	if (!IS_ERR(obj)) {
	38	obj->base.resv = i915_vm_resv_get(vm);
	39	obj->shares_resv_from = vm;
	40	}
	41
6aed5673 MA	42	return obj;
	43	}
	44
89351925	45	struct drm_i915_gem_object alloc_pt_dma(struct i915_address_space vm, int sz)
2c86e55d	46	{
26ad4f8b ML	47	struct drm_i915_gem_object *obj;
26ad4f8b ML	48
2c86e55d MA	49	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
	50	i915_gem_shrink_all(vm->i915);
	51
26ad4f8b	52	obj = i915_gem_object_create_internal(vm->i915, sz);
6aed5673 MA	53	/*
	54	* Ensure all paging structures for this vm share the same dma-resv
	55	* object underneath, with the idea that one object_lock() will lock
	56	* them all at once.
	57	*/
4d8151ae TH	58	if (!IS_ERR(obj)) {
	59	obj->base.resv = i915_vm_resv_get(vm);
	60	obj->shares_resv_from = vm;
	61	}
	62
26ad4f8b	63	return obj;
2c86e55d MA	64	}
2c86e55d MA	65
529b9ec8	66	int map_pt_dma(struct i915_address_space vm, struct drm_i915_gem_object obj)
2c86e55d	67	{
6aed5673	68	enum i915_map_type type;
529b9ec8	69	void *vaddr;
2c86e55d	70
6aed5673 MA	71	type = i915_coherent_map_type(vm->i915, obj, true);
6aed5673 MA	72	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
529b9ec8 MA	73	if (IS_ERR(vaddr))
529b9ec8 MA	74	return PTR_ERR(vaddr);
26ad4f8b ML	75
	76	i915_gem_object_make_unshrinkable(obj);
	77	return 0;
	78	}
	79
529b9ec8	80	int map_pt_dma_locked(struct i915_address_space vm, struct drm_i915_gem_object obj)
26ad4f8b	81	{
6aed5673	82	enum i915_map_type type;
529b9ec8	83	void *vaddr;
26ad4f8b	84
6aed5673 MA	85	type = i915_coherent_map_type(vm->i915, obj, true);
6aed5673 MA	86	vaddr = i915_gem_object_pin_map(obj, type);
529b9ec8 MA	87	if (IS_ERR(vaddr))
529b9ec8 MA	88	return PTR_ERR(vaddr);
2c86e55d	89
89351925 CW	90	i915_gem_object_make_unshrinkable(obj);
89351925 CW	91	return 0;
2c86e55d MA	92	}
	93
	94	void __i915_vm_close(struct i915_address_space *vm)
	95	{
	96	struct i915_vma vma, vn;
	97
ad2f9bc9 CW	98	if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
	99	return;
	100
2c86e55d MA	101	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
	102	struct drm_i915_gem_object *obj = vma->obj;
	103
	104	/* Keep the obj (and hence the vma) alive as _we_ destroy it */
	105	if (!kref_get_unless_zero(&obj->base.refcount))
	106	continue;
	107
	108	atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
	109	WARN_ON(__i915_vma_unbind(vma));
	110	__i915_vma_put(vma);
	111
	112	i915_gem_object_put(obj);
	113	}
	114	GEM_BUG_ON(!list_empty(&vm->bound_list));
ad2f9bc9	115
2c86e55d MA	116	mutex_unlock(&vm->mutex);
	117	}
	118
26ad4f8b ML	119	/* lock the vm into the current ww, if we lock one, we lock all */
	120	int i915_vm_lock_objects(struct i915_address_space *vm,
	121	struct i915_gem_ww_ctx *ww)
	122	{
4d8151ae	123	if (vm->scratch[0]->base.resv == &vm->_resv) {
26ad4f8b ML	124	return i915_gem_object_lock(vm->scratch[0], ww);
	125	} else {
	126	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
	127
	128	/* We borrowed the scratch page from ggtt, take the top level object */
	129	return i915_gem_object_lock(ppgtt->pd->pt.base, ww);
	130	}
	131	}
	132
2c86e55d MA	133	void i915_address_space_fini(struct i915_address_space *vm)
2c86e55d MA	134	{
2c86e55d	135	drm_mm_takedown(&vm->mm);
2c86e55d MA	136	mutex_destroy(&vm->mutex);
	137	}
	138
4d8151ae TH	139	/**
	140	* i915_vm_resv_release - Final struct i915_address_space destructor
	141	* @kref: Pointer to the &i915_address_space.resv_ref member.
	142	*
	143	* This function is called when the last lock sharer no longer shares the
	144	* &i915_address_space._resv lock.
	145	*/
	146	void i915_vm_resv_release(struct kref *kref)
	147	{
	148	struct i915_address_space *vm =
	149	container_of(kref, typeof(*vm), resv_ref);
	150
	151	dma_resv_fini(&vm->_resv);
	152	kfree(vm);
	153	}
	154
2c86e55d MA	155	static void __i915_vm_release(struct work_struct *work)
	156	{
	157	struct i915_address_space *vm =
dcc5d820	158	container_of(work, struct i915_address_space, release_work);
2c86e55d MA	159
	160	vm->cleanup(vm);
	161	i915_address_space_fini(vm);
	162
4d8151ae	163	i915_vm_resv_put(vm);
2c86e55d MA	164	}
	165
	166	void i915_vm_release(struct kref *kref)
	167	{
	168	struct i915_address_space *vm =
	169	container_of(kref, struct i915_address_space, ref);
	170
	171	GEM_BUG_ON(i915_is_ggtt(vm));
	172	trace_i915_ppgtt_release(vm);
	173
dcc5d820	174	queue_work(vm->i915->wq, &vm->release_work);
2c86e55d MA	175	}
	176
	177	void i915_address_space_init(struct i915_address_space *vm, int subclass)
	178	{
	179	kref_init(&vm->ref);
4d8151ae TH	180
	181	/*
	182	* Special case for GGTT that has already done an early
	183	* kref_init here.
	184	*/
	185	if (!kref_read(&vm->resv_ref))
	186	kref_init(&vm->resv_ref);
	187
dcc5d820	188	INIT_WORK(&vm->release_work, __i915_vm_release);
2c86e55d MA	189	atomic_set(&vm->open, 1);
	190
	191	/*
	192	* The vm->mutex must be reclaim safe (for use in the shrinker).
	193	* Do a dummy acquire now under fs_reclaim so that any allocation
	194	* attempt holding the lock is immediately reported by lockdep.
	195	*/
	196	mutex_init(&vm->mutex);
	197	lockdep_set_subclass(&vm->mutex, subclass);
bc6f80cc ML	198
	199	if (!intel_vm_no_concurrent_access_wa(vm->i915)) {
	200	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
	201	} else {
	202	/*
	203	* CHV + BXT VTD workaround use stop_machine(),
	204	* which is allowed to allocate memory. This means &vm->mutex
	205	* is the outer lock, and in theory we can allocate memory inside
	206	* it through stop_machine().
	207	*
	208	* Add the annotation for this, we use trylock in shrinker.
	209	*/
	210	mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_);
	211	might_alloc(GFP_KERNEL);
	212	mutex_release(&vm->mutex.dep_map, _THIS_IP_);
	213	}
4d8151ae	214	dma_resv_init(&vm->_resv);
2c86e55d MA	215
	216	GEM_BUG_ON(!vm->total);
	217	drm_mm_init(&vm->mm, 0, vm->total);
	218	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
	219
2c86e55d MA	220	INIT_LIST_HEAD(&vm->bound_list);
	221	}
	222
	223	void clear_pages(struct i915_vma *vma)
	224	{
	225	GEM_BUG_ON(!vma->pages);
	226
	227	if (vma->pages != vma->obj->mm.pages) {
	228	sg_free_table(vma->pages);
	229	kfree(vma->pages);
	230	}
	231	vma->pages = NULL;
	232
	233	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
	234	}
	235
529b9ec8 MA	236	void __px_vaddr(struct drm_i915_gem_object p)
	237	{
	238	enum i915_map_type type;
	239
	240	GEM_BUG_ON(!i915_gem_object_has_pages(p));
	241	return page_unpack_bits(p->mm.mapping, &type);
	242	}
	243
89351925	244	dma_addr_t __px_dma(struct drm_i915_gem_object *p)
2c86e55d	245	{
89351925 CW	246	GEM_BUG_ON(!i915_gem_object_has_pages(p));
89351925 CW	247	return sg_dma_address(p->mm.pages->sgl);
2c86e55d MA	248	}
2c86e55d MA	249
89351925	250	struct page __px_page(struct drm_i915_gem_object p)
2c86e55d	251	{
89351925 CW	252	GEM_BUG_ON(!i915_gem_object_has_pages(p));
89351925 CW	253	return sg_page(p->mm.pages->sgl);
2c86e55d MA	254	}
	255
	256	void
89351925	257	fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
2c86e55d	258	{
529b9ec8	259	void *vaddr = __px_vaddr(p);
89351925	260
89351925 CW	261	memset64(vaddr, val, count);
89351925 CW	262	clflush_cache_range(vaddr, PAGE_SIZE);
2c86e55d MA	263	}
2c86e55d MA	264
89351925	265	static void poison_scratch_page(struct drm_i915_gem_object *scratch)
82d71e31	266	{
529b9ec8	267	void *vaddr = __px_vaddr(scratch);
89351925	268	u8 val;
82d71e31	269
89351925 CW	270	val = 0;
	271	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
	272	val = POISON_FREE;
82d71e31	273
529b9ec8	274	memset(vaddr, val, scratch->base.size);
82d71e31 CW	275	}
82d71e31 CW	276
89351925	277	int setup_scratch_page(struct i915_address_space *vm)
2c86e55d MA	278	{
	279	unsigned long size;
	280
	281	/*
	282	* In order to utilize 64K pages for an object with a size < 2M, we will
	283	* need to support a 64K scratch page, given that every 16th entry for a
	284	* page-table operating in 64K mode must point to a properly aligned 64K
	285	* region, including any PTEs which happen to point to scratch.
	286	*
	287	* This is only relevant for the 48b PPGTT where we support
	288	* huge-gtt-pages, see also i915_vma_insert(). However, as we share the
	289	* scratch (read-only) between all vm, we create one 64k scratch page
	290	* for all.
	291	*/
	292	size = I915_GTT_PAGE_SIZE_4K;
	293	if (i915_vm_is_4lvl(vm) &&
89351925	294	HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
2c86e55d	295	size = I915_GTT_PAGE_SIZE_64K;
2c86e55d MA	296
2c86e55d MA	297	do {
89351925	298	struct drm_i915_gem_object *obj;
2c86e55d	299
89351925 CW	300	obj = vm->alloc_pt_dma(vm, size);
89351925 CW	301	if (IS_ERR(obj))
2c86e55d MA	302	goto skip;
2c86e55d MA	303
529b9ec8	304	if (map_pt_dma(vm, obj))
89351925 CW	305	goto skip_obj;
	306
	307	/* We need a single contiguous page for our scratch */
	308	if (obj->mm.page_sizes.sg < size)
	309	goto skip_obj;
	310
	311	/* And it needs to be correspondingly aligned */
	312	if (__px_dma(obj) & (size - 1))
	313	goto skip_obj;
	314
82d71e31 CW	315	/*
	316	* Use a non-zero scratch page for debugging.
	317	*
	318	* We want a value that should be reasonably obvious
	319	* to spot in the error state, while also causing a GPU hang
	320	* if executed. We prefer using a clear page in production, so
	321	* should it ever be accidentally used, the effect should be
	322	* fairly benign.
	323	*/
89351925 CW	324	poison_scratch_page(obj);
	325
	326	vm->scratch[0] = obj;
	327	vm->scratch_order = get_order(size);
2c86e55d MA	328	return 0;
2c86e55d MA	329
89351925 CW	330	skip_obj:
89351925 CW	331	i915_gem_object_put(obj);
2c86e55d MA	332	skip:
	333	if (size == I915_GTT_PAGE_SIZE_4K)
	334	return -ENOMEM;
	335
	336	size = I915_GTT_PAGE_SIZE_4K;
2c86e55d MA	337	} while (1);
	338	}
	339
2c86e55d MA	340	void free_scratch(struct i915_address_space *vm)
	341	{
	342	int i;
	343
89351925 CW	344	for (i = 0; i <= vm->top; i++)
89351925 CW	345	i915_gem_object_put(vm->scratch[i]);
2c86e55d MA	346	}
	347
	348	void gtt_write_workarounds(struct intel_gt *gt)
	349	{
	350	struct drm_i915_private *i915 = gt->i915;
	351	struct intel_uncore *uncore = gt->uncore;
	352
	353	/*
	354	* This function is for gtt related workarounds. This function is
	355	* called on driver load and after a GPU reset, so you can place
	356	* workarounds here even if they get overwritten by GPU reset.
	357	*/
	358	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
	359	if (IS_BROADWELL(i915))
	360	intel_uncore_write(uncore,
	361	GEN8_L3_LRA_1_GPGPU,
	362	GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
	363	else if (IS_CHERRYVIEW(i915))
	364	intel_uncore_write(uncore,
	365	GEN8_L3_LRA_1_GPGPU,
	366	GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
	367	else if (IS_GEN9_LP(i915))
	368	intel_uncore_write(uncore,
	369	GEN8_L3_LRA_1_GPGPU,
	370	GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
c816723b	371	else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11)
2c86e55d MA	372	intel_uncore_write(uncore,
	373	GEN8_L3_LRA_1_GPGPU,
	374	GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
	375
	376	/*
	377	* To support 64K PTEs we need to first enable the use of the
	378	* Intermediate-Page-Size(IPS) bit of the PDE field via some magical
	379	* mmio, otherwise the page-walker will simply ignore the IPS bit. This
	380	* shouldn't be needed after GEN10.
	381	*
	382	* 64K pages were first introduced from BDW+, although technically they
	383	* only work from gen9+. For pre-BDW we instead have the option for
	384	* 32K pages, but we don't currently have any support for it in our
	385	* driver.
	386	*/
	387	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
c816723b	388	GRAPHICS_VER(i915) <= 10)
2c86e55d MA	389	intel_uncore_rmw(uncore,
	390	GEN8_GAMW_ECO_DEV_RW_IA,
	391	0,
	392	GAMW_ECO_ENABLE_64K_IPS_FIELD);
	393
c816723b	394	if (IS_GRAPHICS_VER(i915, 8, 11)) {
2c86e55d MA	395	bool can_use_gtt_cache = true;
	396
	397	/*
	398	* According to the BSpec if we use 2M/1G pages then we also
	399	* need to disable the GTT cache. At least on BDW we can see
	400	* visual corruption when using 2M pages, and not disabling the
	401	* GTT cache.
	402	*/
	403	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
	404	can_use_gtt_cache = false;
	405
	406	/* WaGttCachingOffByDefault */
	407	intel_uncore_write(uncore,
	408	HSW_GTT_CACHE_EN,
	409	can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
0d4c351a PB	410	drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
	411	intel_uncore_read(uncore,
	412	HSW_GTT_CACHE_EN) == 0);
2c86e55d MA	413	}
	414	}
	415
2c86e55d MA	416	static void tgl_setup_private_ppat(struct intel_uncore *uncore)
	417	{
	418	/* TGL doesn't support LLC or AGE settings */
	419	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
	420	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
	421	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
	422	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
	423	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
	424	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
	425	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
	426	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
	427	}
	428
6266992c	429	static void icl_setup_private_ppat(struct intel_uncore *uncore)
2c86e55d MA	430	{
	431	intel_uncore_write(uncore,
	432	GEN10_PAT_INDEX(0),
	433	GEN8_PPAT_WB \| GEN8_PPAT_LLC);
	434	intel_uncore_write(uncore,
	435	GEN10_PAT_INDEX(1),
	436	GEN8_PPAT_WC \| GEN8_PPAT_LLCELLC);
	437	intel_uncore_write(uncore,
	438	GEN10_PAT_INDEX(2),
c0888e9e	439	GEN8_PPAT_WB \| GEN8_PPAT_ELLC_OVERRIDE);
2c86e55d MA	440	intel_uncore_write(uncore,
	441	GEN10_PAT_INDEX(3),
	442	GEN8_PPAT_UC);
	443	intel_uncore_write(uncore,
	444	GEN10_PAT_INDEX(4),
	445	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(0));
	446	intel_uncore_write(uncore,
	447	GEN10_PAT_INDEX(5),
	448	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(1));
	449	intel_uncore_write(uncore,
	450	GEN10_PAT_INDEX(6),
	451	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(2));
	452	intel_uncore_write(uncore,
	453	GEN10_PAT_INDEX(7),
	454	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(3));
	455	}
	456
	457	/*
	458	* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
	459	* bits. When using advanced contexts each context stores its own PAT, but
	460	* writing this data shouldn't be harmful even in those cases.
	461	*/
	462	static void bdw_setup_private_ppat(struct intel_uncore *uncore)
	463	{
c0888e9e	464	struct drm_i915_private *i915 = uncore->i915;
2c86e55d MA	465	u64 pat;
	466
	467	pat = GEN8_PPAT(0, GEN8_PPAT_WB \| GEN8_PPAT_LLC) \| /* for normal objects, no eLLC */
	468	GEN8_PPAT(1, GEN8_PPAT_WC \| GEN8_PPAT_LLCELLC) \| /* for something pointing to ptes? */
2c86e55d MA	469	GEN8_PPAT(3, GEN8_PPAT_UC) \| /* Uncached objects, mostly for scanout */
	470	GEN8_PPAT(4, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(0)) \|
	471	GEN8_PPAT(5, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(1)) \|
	472	GEN8_PPAT(6, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(2)) \|
	473	GEN8_PPAT(7, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(3));
	474
c0888e9e	475	/* for scanout with eLLC */
c816723b	476	if (GRAPHICS_VER(i915) >= 9)
c0888e9e VS	477	pat \|= GEN8_PPAT(2, GEN8_PPAT_WB \| GEN8_PPAT_ELLC_OVERRIDE);
	478	else
	479	pat \|= GEN8_PPAT(2, GEN8_PPAT_WT \| GEN8_PPAT_LLCELLC);
	480
2c86e55d MA	481	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
	482	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
	483	}
	484
	485	static void chv_setup_private_ppat(struct intel_uncore *uncore)
	486	{
	487	u64 pat;
	488
	489	/*
	490	* Map WB on BDW to snooped on CHV.
	491	*
	492	* Only the snoop bit has meaning for CHV, the rest is
	493	* ignored.
	494	*
	495	* The hardware will never snoop for certain types of accesses:
	496	* - CPU GTT (GMADR->GGTT->no snoop->memory)
	497	* - PPGTT page tables
	498	* - some other special cycles
	499	*
	500	* As with BDW, we also need to consider the following for GT accesses:
	501	* "For GGTT, there is NO pat_sel[2:0] from the entry,
	502	* so RTL will always use the value corresponding to
	503	* pat_sel = 000".
	504	* Which means we must set the snoop bit in PAT entry 0
	505	* in order to keep the global status page working.
	506	*/
	507
	508	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) \|
	509	GEN8_PPAT(1, 0) \|
	510	GEN8_PPAT(2, 0) \|
	511	GEN8_PPAT(3, 0) \|
	512	GEN8_PPAT(4, CHV_PPAT_SNOOP) \|
	513	GEN8_PPAT(5, CHV_PPAT_SNOOP) \|
	514	GEN8_PPAT(6, CHV_PPAT_SNOOP) \|
	515	GEN8_PPAT(7, CHV_PPAT_SNOOP);
	516
	517	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
	518	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
	519	}
	520
	521	void setup_private_pat(struct intel_uncore *uncore)
	522	{
	523	struct drm_i915_private *i915 = uncore->i915;
	524
c816723b	525	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
2c86e55d	526
c816723b	527	if (GRAPHICS_VER(i915) >= 12)
2c86e55d	528	tgl_setup_private_ppat(uncore);
6266992c LDM	529	else if (GRAPHICS_VER(i915) >= 11)
6266992c LDM	530	icl_setup_private_ppat(uncore);
2c86e55d MA	531	else if (IS_CHERRYVIEW(i915) \|\| IS_GEN9_LP(i915))
	532	chv_setup_private_ppat(uncore);
	533	else
	534	bdw_setup_private_ppat(uncore);
	535	}
	536
a4d86249 CW	537	struct i915_vma *
	538	__vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
	539	{
	540	struct drm_i915_gem_object *obj;
	541	struct i915_vma *vma;
a4d86249 CW	542
	543	obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size));
	544	if (IS_ERR(obj))
	545	return ERR_CAST(obj);
	546
	547	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
	548
	549	vma = i915_vma_instance(obj, vm, NULL);
	550	if (IS_ERR(vma)) {
	551	i915_gem_object_put(obj);
	552	return vma;
	553	}
	554
2a665968 ML	555	return vma;
	556	}
	557
	558	struct i915_vma *
	559	__vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size)
	560	{
	561	struct i915_vma *vma;
	562	int err;
	563
	564	vma = __vm_create_scratch_for_read(vm, size);
	565	if (IS_ERR(vma))
	566	return vma;
	567
a4d86249 CW	568	err = i915_vma_pin(vma, 0, 0,
	569	i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
	570	if (err) {
	571	i915_vma_put(vma);
	572	return ERR_PTR(err);
	573	}
	574
	575	return vma;
	576	}
	577
2c86e55d MA	578	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
	579	#include "selftests/mock_gtt.c"
	580	#endif