Commit | Line | Data |
---|---|---|
2c86e55d MA |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include <linux/slab.h> /* fault-inject.h is not standalone! */ | |
7 | ||
8 | #include <linux/fault-inject.h> | |
8581fd40 | 9 | #include <linux/sched/mm.h> |
2c86e55d | 10 | |
2ca77606 MA |
11 | #include <drm/drm_cache.h> |
12 | ||
b508d01f | 13 | #include "gem/i915_gem_internal.h" |
6aed5673 | 14 | #include "gem/i915_gem_lmem.h" |
801543b2 | 15 | #include "i915_reg.h" |
2c86e55d | 16 | #include "i915_trace.h" |
a7f46d5b | 17 | #include "i915_utils.h" |
2c86e55d | 18 | #include "intel_gt.h" |
77fa9efc | 19 | #include "intel_gt_mcr.h" |
67804e48 | 20 | #include "intel_gt_print.h" |
0d6419e9 | 21 | #include "intel_gt_regs.h" |
2c86e55d MA |
22 | #include "intel_gtt.h" |
23 | ||
3f5f6288 ND |
24 | bool i915_ggtt_require_binder(struct drm_i915_private *i915) |
25 | { | |
799d794f | 26 | /* Wa_13010847436 & Wa_14019519902 */ |
be5e8dc8 VS |
27 | return !i915_direct_stolen_access(i915) && |
28 | MEDIA_VER_FULL(i915) == IP_VER(13, 0); | |
3f5f6288 | 29 | } |
a7f46d5b TU |
30 | |
31 | static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) | |
32 | { | |
33 | return IS_BROXTON(i915) && i915_vtd_active(i915); | |
34 | } | |
35 | ||
36 | bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) | |
37 | { | |
38 | return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); | |
39 | } | |
40 | ||
6aed5673 MA |
41 | struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) |
42 | { | |
43 | struct drm_i915_gem_object *obj; | |
44 | ||
32334c9b MA |
45 | /* |
46 | * To avoid severe over-allocation when dealing with min_page_size | |
47 | * restrictions, we override that behaviour here by allowing an object | |
48 | * size and page layout which can be smaller. In practice this should be | |
49 | * totally fine, since GTT paging structures are not typically inserted | |
50 | * into the GTT. | |
51 | * | |
52 | * Note that we also hit this path for the scratch page, and for this | |
53 | * case it might need to be 64K, but that should work fine here since we | |
54 | * used the passed in size for the page size, which should ensure it | |
55 | * also has the same alignment. | |
56 | */ | |
a259cc14 TH |
57 | obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, |
58 | vm->lmem_pt_obj_flags); | |
6aed5673 MA |
59 | /* |
60 | * Ensure all paging structures for this vm share the same dma-resv | |
61 | * object underneath, with the idea that one object_lock() will lock | |
62 | * them all at once. | |
63 | */ | |
4d8151ae TH |
64 | if (!IS_ERR(obj)) { |
65 | obj->base.resv = i915_vm_resv_get(vm); | |
66 | obj->shares_resv_from = vm; | |
978e1a52 TU |
67 | |
68 | if (vm->fpriv) | |
69 | i915_drm_client_add_object(vm->fpriv->client, obj); | |
4d8151ae TH |
70 | } |
71 | ||
6aed5673 MA |
72 | return obj; |
73 | } | |
74 | ||
89351925 | 75 | struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) |
2c86e55d | 76 | { |
26ad4f8b ML |
77 | struct drm_i915_gem_object *obj; |
78 | ||
2c86e55d MA |
79 | if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) |
80 | i915_gem_shrink_all(vm->i915); | |
81 | ||
26ad4f8b | 82 | obj = i915_gem_object_create_internal(vm->i915, sz); |
6aed5673 MA |
83 | /* |
84 | * Ensure all paging structures for this vm share the same dma-resv | |
85 | * object underneath, with the idea that one object_lock() will lock | |
86 | * them all at once. | |
87 | */ | |
4d8151ae TH |
88 | if (!IS_ERR(obj)) { |
89 | obj->base.resv = i915_vm_resv_get(vm); | |
90 | obj->shares_resv_from = vm; | |
978e1a52 TU |
91 | |
92 | if (vm->fpriv) | |
93 | i915_drm_client_add_object(vm->fpriv->client, obj); | |
4d8151ae TH |
94 | } |
95 | ||
26ad4f8b | 96 | return obj; |
2c86e55d MA |
97 | } |
98 | ||
529b9ec8 | 99 | int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
2c86e55d | 100 | { |
6aed5673 | 101 | enum i915_map_type type; |
529b9ec8 | 102 | void *vaddr; |
2c86e55d | 103 | |
115cdcca | 104 | type = intel_gt_coherent_map_type(vm->gt, obj, true); |
34df0a03 JC |
105 | /* |
106 | * FIXME: It is suspected that some Address Translation Service (ATS) | |
107 | * issue on IOMMU is causing CAT errors to occur on some MTL workloads. | |
108 | * Applying a write barrier to the ppgtt set entry functions appeared | |
109 | * to have no effect, so we must temporarily use I915_MAP_WC here on | |
110 | * MTL until a proper ATS solution is found. | |
111 | */ | |
112 | if (IS_METEORLAKE(vm->i915)) | |
113 | type = I915_MAP_WC; | |
114 | ||
6aed5673 | 115 | vaddr = i915_gem_object_pin_map_unlocked(obj, type); |
529b9ec8 MA |
116 | if (IS_ERR(vaddr)) |
117 | return PTR_ERR(vaddr); | |
26ad4f8b ML |
118 | |
119 | i915_gem_object_make_unshrinkable(obj); | |
120 | return 0; | |
121 | } | |
122 | ||
529b9ec8 | 123 | int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
26ad4f8b | 124 | { |
6aed5673 | 125 | enum i915_map_type type; |
529b9ec8 | 126 | void *vaddr; |
26ad4f8b | 127 | |
115cdcca | 128 | type = intel_gt_coherent_map_type(vm->gt, obj, true); |
34df0a03 JC |
129 | /* |
130 | * FIXME: It is suspected that some Address Translation Service (ATS) | |
131 | * issue on IOMMU is causing CAT errors to occur on some MTL workloads. | |
132 | * Applying a write barrier to the ppgtt set entry functions appeared | |
133 | * to have no effect, so we must temporarily use I915_MAP_WC here on | |
134 | * MTL until a proper ATS solution is found. | |
135 | */ | |
136 | if (IS_METEORLAKE(vm->i915)) | |
137 | type = I915_MAP_WC; | |
138 | ||
6aed5673 | 139 | vaddr = i915_gem_object_pin_map(obj, type); |
529b9ec8 MA |
140 | if (IS_ERR(vaddr)) |
141 | return PTR_ERR(vaddr); | |
2c86e55d | 142 | |
89351925 CW |
143 | i915_gem_object_make_unshrinkable(obj); |
144 | return 0; | |
2c86e55d MA |
145 | } |
146 | ||
e1a7ab4f | 147 | static void clear_vm_list(struct list_head *list) |
2c86e55d MA |
148 | { |
149 | struct i915_vma *vma, *vn; | |
150 | ||
e1a7ab4f | 151 | list_for_each_entry_safe(vma, vn, list, vm_link) { |
2c86e55d MA |
152 | struct drm_i915_gem_object *obj = vma->obj; |
153 | ||
e1a7ab4f | 154 | if (!i915_gem_object_get_rcu(obj)) { |
c03d9826 | 155 | /* |
e1a7ab4f TH |
156 | * Object is dying, but has not yet cleared its |
157 | * vma list. | |
158 | * Unbind the dying vma to ensure our list | |
c03d9826 | 159 | * is completely drained. We leave the destruction to |
e1a7ab4f TH |
160 | * the object destructor to avoid the vma |
161 | * disappearing under it. | |
c03d9826 TH |
162 | */ |
163 | atomic_and(~I915_VMA_PIN_MASK, &vma->flags); | |
164 | WARN_ON(__i915_vma_unbind(vma)); | |
e1a7ab4f TH |
165 | |
166 | /* Remove from the unbound list */ | |
167 | list_del_init(&vma->vm_link); | |
168 | ||
169 | /* | |
170 | * Delay the vm and vm mutex freeing until the | |
171 | * object is done with destruction. | |
172 | */ | |
173 | i915_vm_resv_get(vma->vm); | |
174 | vma->vm_ddestroy = true; | |
175 | } else { | |
176 | i915_vma_destroy_locked(vma); | |
177 | i915_gem_object_put(obj); | |
c03d9826 | 178 | } |
2c86e55d | 179 | |
2c86e55d | 180 | } |
e1a7ab4f TH |
181 | } |
182 | ||
183 | static void __i915_vm_close(struct i915_address_space *vm) | |
184 | { | |
185 | mutex_lock(&vm->mutex); | |
186 | ||
187 | clear_vm_list(&vm->bound_list); | |
188 | clear_vm_list(&vm->unbound_list); | |
189 | ||
190 | /* Check for must-fix unanticipated side-effects */ | |
2c86e55d | 191 | GEM_BUG_ON(!list_empty(&vm->bound_list)); |
e1a7ab4f | 192 | GEM_BUG_ON(!list_empty(&vm->unbound_list)); |
ad2f9bc9 | 193 | |
2c86e55d MA |
194 | mutex_unlock(&vm->mutex); |
195 | } | |
196 | ||
26ad4f8b ML |
197 | /* lock the vm into the current ww, if we lock one, we lock all */ |
198 | int i915_vm_lock_objects(struct i915_address_space *vm, | |
199 | struct i915_gem_ww_ctx *ww) | |
200 | { | |
4d8151ae | 201 | if (vm->scratch[0]->base.resv == &vm->_resv) { |
26ad4f8b ML |
202 | return i915_gem_object_lock(vm->scratch[0], ww); |
203 | } else { | |
204 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); | |
205 | ||
206 | /* We borrowed the scratch page from ggtt, take the top level object */ | |
207 | return i915_gem_object_lock(ppgtt->pd->pt.base, ww); | |
208 | } | |
209 | } | |
210 | ||
2c86e55d MA |
211 | void i915_address_space_fini(struct i915_address_space *vm) |
212 | { | |
2c86e55d | 213 | drm_mm_takedown(&vm->mm); |
2c86e55d MA |
214 | } |
215 | ||
4d8151ae TH |
216 | /** |
217 | * i915_vm_resv_release - Final struct i915_address_space destructor | |
218 | * @kref: Pointer to the &i915_address_space.resv_ref member. | |
219 | * | |
220 | * This function is called when the last lock sharer no longer shares the | |
e1a7ab4f TH |
221 | * &i915_address_space._resv lock, and also if we raced when |
222 | * destroying a vma by the vma destruction | |
4d8151ae TH |
223 | */ |
224 | void i915_vm_resv_release(struct kref *kref) | |
225 | { | |
226 | struct i915_address_space *vm = | |
227 | container_of(kref, typeof(*vm), resv_ref); | |
228 | ||
229 | dma_resv_fini(&vm->_resv); | |
e1a7ab4f TH |
230 | mutex_destroy(&vm->mutex); |
231 | ||
4d8151ae TH |
232 | kfree(vm); |
233 | } | |
234 | ||
2c86e55d MA |
235 | static void __i915_vm_release(struct work_struct *work) |
236 | { | |
237 | struct i915_address_space *vm = | |
dcc5d820 | 238 | container_of(work, struct i915_address_space, release_work); |
2c86e55d | 239 | |
e1a7ab4f TH |
240 | __i915_vm_close(vm); |
241 | ||
2f6b90da TH |
242 | /* Synchronize async unbinds. */ |
243 | i915_vma_resource_bind_dep_sync_all(vm); | |
244 | ||
2c86e55d MA |
245 | vm->cleanup(vm); |
246 | i915_address_space_fini(vm); | |
247 | ||
4d8151ae | 248 | i915_vm_resv_put(vm); |
2c86e55d MA |
249 | } |
250 | ||
251 | void i915_vm_release(struct kref *kref) | |
252 | { | |
253 | struct i915_address_space *vm = | |
254 | container_of(kref, struct i915_address_space, ref); | |
255 | ||
256 | GEM_BUG_ON(i915_is_ggtt(vm)); | |
257 | trace_i915_ppgtt_release(vm); | |
258 | ||
dcc5d820 | 259 | queue_work(vm->i915->wq, &vm->release_work); |
2c86e55d MA |
260 | } |
261 | ||
262 | void i915_address_space_init(struct i915_address_space *vm, int subclass) | |
263 | { | |
264 | kref_init(&vm->ref); | |
4d8151ae TH |
265 | |
266 | /* | |
267 | * Special case for GGTT that has already done an early | |
268 | * kref_init here. | |
269 | */ | |
270 | if (!kref_read(&vm->resv_ref)) | |
271 | kref_init(&vm->resv_ref); | |
272 | ||
2f6b90da | 273 | vm->pending_unbind = RB_ROOT_CACHED; |
dcc5d820 | 274 | INIT_WORK(&vm->release_work, __i915_vm_release); |
2c86e55d MA |
275 | |
276 | /* | |
277 | * The vm->mutex must be reclaim safe (for use in the shrinker). | |
278 | * Do a dummy acquire now under fs_reclaim so that any allocation | |
279 | * attempt holding the lock is immediately reported by lockdep. | |
280 | */ | |
281 | mutex_init(&vm->mutex); | |
282 | lockdep_set_subclass(&vm->mutex, subclass); | |
bc6f80cc ML |
283 | |
284 | if (!intel_vm_no_concurrent_access_wa(vm->i915)) { | |
285 | i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); | |
286 | } else { | |
287 | /* | |
288 | * CHV + BXT VTD workaround use stop_machine(), | |
289 | * which is allowed to allocate memory. This means &vm->mutex | |
290 | * is the outer lock, and in theory we can allocate memory inside | |
291 | * it through stop_machine(). | |
292 | * | |
293 | * Add the annotation for this, we use trylock in shrinker. | |
294 | */ | |
295 | mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); | |
296 | might_alloc(GFP_KERNEL); | |
297 | mutex_release(&vm->mutex.dep_map, _THIS_IP_); | |
298 | } | |
4d8151ae | 299 | dma_resv_init(&vm->_resv); |
2c86e55d MA |
300 | |
301 | GEM_BUG_ON(!vm->total); | |
302 | drm_mm_init(&vm->mm, 0, vm->total); | |
87bd701e MA |
303 | |
304 | memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, | |
305 | ARRAY_SIZE(vm->min_alignment)); | |
306 | ||
8133a6da | 307 | if (HAS_64K_PAGES(vm->i915)) { |
87bd701e MA |
308 | vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; |
309 | vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; | |
310 | } | |
311 | ||
2c86e55d MA |
312 | vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; |
313 | ||
2c86e55d | 314 | INIT_LIST_HEAD(&vm->bound_list); |
e1a7ab4f | 315 | INIT_LIST_HEAD(&vm->unbound_list); |
2c86e55d MA |
316 | } |
317 | ||
529b9ec8 MA |
318 | void *__px_vaddr(struct drm_i915_gem_object *p) |
319 | { | |
320 | enum i915_map_type type; | |
321 | ||
322 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); | |
323 | return page_unpack_bits(p->mm.mapping, &type); | |
324 | } | |
325 | ||
89351925 | 326 | dma_addr_t __px_dma(struct drm_i915_gem_object *p) |
2c86e55d | 327 | { |
89351925 CW |
328 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
329 | return sg_dma_address(p->mm.pages->sgl); | |
2c86e55d MA |
330 | } |
331 | ||
89351925 | 332 | struct page *__px_page(struct drm_i915_gem_object *p) |
2c86e55d | 333 | { |
89351925 CW |
334 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
335 | return sg_page(p->mm.pages->sgl); | |
2c86e55d MA |
336 | } |
337 | ||
338 | void | |
89351925 | 339 | fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) |
2c86e55d | 340 | { |
529b9ec8 | 341 | void *vaddr = __px_vaddr(p); |
89351925 | 342 | |
89351925 | 343 | memset64(vaddr, val, count); |
61c5ed94 | 344 | drm_clflush_virt_range(vaddr, PAGE_SIZE); |
2c86e55d MA |
345 | } |
346 | ||
89351925 | 347 | static void poison_scratch_page(struct drm_i915_gem_object *scratch) |
82d71e31 | 348 | { |
529b9ec8 | 349 | void *vaddr = __px_vaddr(scratch); |
89351925 | 350 | u8 val; |
82d71e31 | 351 | |
89351925 CW |
352 | val = 0; |
353 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | |
354 | val = POISON_FREE; | |
82d71e31 | 355 | |
529b9ec8 | 356 | memset(vaddr, val, scratch->base.size); |
2ca77606 | 357 | drm_clflush_virt_range(vaddr, scratch->base.size); |
82d71e31 CW |
358 | } |
359 | ||
89351925 | 360 | int setup_scratch_page(struct i915_address_space *vm) |
2c86e55d MA |
361 | { |
362 | unsigned long size; | |
363 | ||
364 | /* | |
365 | * In order to utilize 64K pages for an object with a size < 2M, we will | |
366 | * need to support a 64K scratch page, given that every 16th entry for a | |
367 | * page-table operating in 64K mode must point to a properly aligned 64K | |
368 | * region, including any PTEs which happen to point to scratch. | |
369 | * | |
370 | * This is only relevant for the 48b PPGTT where we support | |
371 | * huge-gtt-pages, see also i915_vma_insert(). However, as we share the | |
372 | * scratch (read-only) between all vm, we create one 64k scratch page | |
373 | * for all. | |
374 | */ | |
375 | size = I915_GTT_PAGE_SIZE_4K; | |
376 | if (i915_vm_is_4lvl(vm) && | |
8133a6da MA |
377 | HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && |
378 | !HAS_64K_PAGES(vm->i915)) | |
2c86e55d | 379 | size = I915_GTT_PAGE_SIZE_64K; |
2c86e55d MA |
380 | |
381 | do { | |
89351925 | 382 | struct drm_i915_gem_object *obj; |
2c86e55d | 383 | |
fef53be0 | 384 | obj = vm->alloc_scratch_dma(vm, size); |
89351925 | 385 | if (IS_ERR(obj)) |
2c86e55d MA |
386 | goto skip; |
387 | ||
529b9ec8 | 388 | if (map_pt_dma(vm, obj)) |
89351925 CW |
389 | goto skip_obj; |
390 | ||
391 | /* We need a single contiguous page for our scratch */ | |
392 | if (obj->mm.page_sizes.sg < size) | |
393 | goto skip_obj; | |
394 | ||
395 | /* And it needs to be correspondingly aligned */ | |
396 | if (__px_dma(obj) & (size - 1)) | |
397 | goto skip_obj; | |
398 | ||
82d71e31 CW |
399 | /* |
400 | * Use a non-zero scratch page for debugging. | |
401 | * | |
402 | * We want a value that should be reasonably obvious | |
403 | * to spot in the error state, while also causing a GPU hang | |
404 | * if executed. We prefer using a clear page in production, so | |
405 | * should it ever be accidentally used, the effect should be | |
406 | * fairly benign. | |
407 | */ | |
89351925 CW |
408 | poison_scratch_page(obj); |
409 | ||
410 | vm->scratch[0] = obj; | |
411 | vm->scratch_order = get_order(size); | |
2c86e55d MA |
412 | return 0; |
413 | ||
89351925 CW |
414 | skip_obj: |
415 | i915_gem_object_put(obj); | |
2c86e55d MA |
416 | skip: |
417 | if (size == I915_GTT_PAGE_SIZE_4K) | |
418 | return -ENOMEM; | |
419 | ||
420 | size = I915_GTT_PAGE_SIZE_4K; | |
2c86e55d MA |
421 | } while (1); |
422 | } | |
423 | ||
2c86e55d MA |
424 | void free_scratch(struct i915_address_space *vm) |
425 | { | |
426 | int i; | |
427 | ||
c286558f CW |
428 | if (!vm->scratch[0]) |
429 | return; | |
430 | ||
89351925 CW |
431 | for (i = 0; i <= vm->top; i++) |
432 | i915_gem_object_put(vm->scratch[i]); | |
2c86e55d MA |
433 | } |
434 | ||
435 | void gtt_write_workarounds(struct intel_gt *gt) | |
436 | { | |
437 | struct drm_i915_private *i915 = gt->i915; | |
438 | struct intel_uncore *uncore = gt->uncore; | |
439 | ||
440 | /* | |
441 | * This function is for gtt related workarounds. This function is | |
442 | * called on driver load and after a GPU reset, so you can place | |
443 | * workarounds here even if they get overwritten by GPU reset. | |
444 | */ | |
445 | /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ | |
446 | if (IS_BROADWELL(i915)) | |
447 | intel_uncore_write(uncore, | |
448 | GEN8_L3_LRA_1_GPGPU, | |
449 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); | |
450 | else if (IS_CHERRYVIEW(i915)) | |
451 | intel_uncore_write(uncore, | |
452 | GEN8_L3_LRA_1_GPGPU, | |
453 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); | |
454 | else if (IS_GEN9_LP(i915)) | |
455 | intel_uncore_write(uncore, | |
456 | GEN8_L3_LRA_1_GPGPU, | |
457 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); | |
c816723b | 458 | else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) |
2c86e55d MA |
459 | intel_uncore_write(uncore, |
460 | GEN8_L3_LRA_1_GPGPU, | |
461 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); | |
462 | ||
463 | /* | |
464 | * To support 64K PTEs we need to first enable the use of the | |
465 | * Intermediate-Page-Size(IPS) bit of the PDE field via some magical | |
466 | * mmio, otherwise the page-walker will simply ignore the IPS bit. This | |
467 | * shouldn't be needed after GEN10. | |
468 | * | |
469 | * 64K pages were first introduced from BDW+, although technically they | |
470 | * only *work* from gen9+. For pre-BDW we instead have the option for | |
471 | * 32K pages, but we don't currently have any support for it in our | |
472 | * driver. | |
473 | */ | |
474 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && | |
c816723b | 475 | GRAPHICS_VER(i915) <= 10) |
2c86e55d MA |
476 | intel_uncore_rmw(uncore, |
477 | GEN8_GAMW_ECO_DEV_RW_IA, | |
478 | 0, | |
479 | GAMW_ECO_ENABLE_64K_IPS_FIELD); | |
480 | ||
c816723b | 481 | if (IS_GRAPHICS_VER(i915, 8, 11)) { |
2c86e55d MA |
482 | bool can_use_gtt_cache = true; |
483 | ||
484 | /* | |
485 | * According to the BSpec if we use 2M/1G pages then we also | |
486 | * need to disable the GTT cache. At least on BDW we can see | |
487 | * visual corruption when using 2M pages, and not disabling the | |
488 | * GTT cache. | |
489 | */ | |
490 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) | |
491 | can_use_gtt_cache = false; | |
492 | ||
493 | /* WaGttCachingOffByDefault */ | |
494 | intel_uncore_write(uncore, | |
495 | HSW_GTT_CACHE_EN, | |
496 | can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); | |
67804e48 JH |
497 | gt_WARN_ON_ONCE(gt, can_use_gtt_cache && |
498 | intel_uncore_read(uncore, | |
499 | HSW_GTT_CACHE_EN) == 0); | |
2c86e55d MA |
500 | } |
501 | } | |
502 | ||
b76c0dee MTP |
503 | static void xelpmp_setup_private_ppat(struct intel_uncore *uncore) |
504 | { | |
505 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(0), | |
506 | MTL_PPAT_L4_0_WB); | |
507 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(1), | |
508 | MTL_PPAT_L4_1_WT); | |
509 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(2), | |
510 | MTL_PPAT_L4_3_UC); | |
511 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(3), | |
512 | MTL_PPAT_L4_0_WB | MTL_2_COH_1W); | |
513 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(4), | |
514 | MTL_PPAT_L4_0_WB | MTL_3_COH_2W); | |
515 | ||
516 | /* | |
517 | * Remaining PAT entries are left at the hardware-default | |
518 | * fully-cached setting | |
519 | */ | |
520 | } | |
521 | ||
522 | static void xelpg_setup_private_ppat(struct intel_gt *gt) | |
523 | { | |
524 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), | |
525 | MTL_PPAT_L4_0_WB); | |
526 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), | |
527 | MTL_PPAT_L4_1_WT); | |
528 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), | |
529 | MTL_PPAT_L4_3_UC); | |
530 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), | |
531 | MTL_PPAT_L4_0_WB | MTL_2_COH_1W); | |
532 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), | |
533 | MTL_PPAT_L4_0_WB | MTL_3_COH_2W); | |
534 | ||
535 | /* | |
536 | * Remaining PAT entries are left at the hardware-default | |
537 | * fully-cached setting | |
538 | */ | |
539 | } | |
540 | ||
2c86e55d MA |
541 | static void tgl_setup_private_ppat(struct intel_uncore *uncore) |
542 | { | |
543 | /* TGL doesn't support LLC or AGE settings */ | |
544 | intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); | |
545 | intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); | |
546 | intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); | |
547 | intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); | |
548 | intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); | |
549 | intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); | |
550 | intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); | |
551 | intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); | |
552 | } | |
553 | ||
77fa9efc MR |
554 | static void xehp_setup_private_ppat(struct intel_gt *gt) |
555 | { | |
70b61208 MR |
556 | enum forcewake_domains fw; |
557 | unsigned long flags; | |
558 | ||
559 | fw = intel_uncore_forcewake_for_reg(gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg), | |
560 | FW_REG_WRITE); | |
561 | intel_uncore_forcewake_get(gt->uncore, fw); | |
562 | ||
563 | intel_gt_mcr_lock(gt, &flags); | |
564 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); | |
565 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); | |
566 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); | |
567 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); | |
568 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); | |
569 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); | |
570 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); | |
571 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); | |
572 | intel_gt_mcr_unlock(gt, flags); | |
573 | ||
574 | intel_uncore_forcewake_put(gt->uncore, fw); | |
77fa9efc MR |
575 | } |
576 | ||
6266992c | 577 | static void icl_setup_private_ppat(struct intel_uncore *uncore) |
2c86e55d MA |
578 | { |
579 | intel_uncore_write(uncore, | |
580 | GEN10_PAT_INDEX(0), | |
581 | GEN8_PPAT_WB | GEN8_PPAT_LLC); | |
582 | intel_uncore_write(uncore, | |
583 | GEN10_PAT_INDEX(1), | |
584 | GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); | |
585 | intel_uncore_write(uncore, | |
586 | GEN10_PAT_INDEX(2), | |
c0888e9e | 587 | GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
2c86e55d MA |
588 | intel_uncore_write(uncore, |
589 | GEN10_PAT_INDEX(3), | |
590 | GEN8_PPAT_UC); | |
591 | intel_uncore_write(uncore, | |
592 | GEN10_PAT_INDEX(4), | |
593 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); | |
594 | intel_uncore_write(uncore, | |
595 | GEN10_PAT_INDEX(5), | |
596 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); | |
597 | intel_uncore_write(uncore, | |
598 | GEN10_PAT_INDEX(6), | |
599 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); | |
600 | intel_uncore_write(uncore, | |
601 | GEN10_PAT_INDEX(7), | |
602 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
603 | } | |
604 | ||
605 | /* | |
606 | * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability | |
607 | * bits. When using advanced contexts each context stores its own PAT, but | |
608 | * writing this data shouldn't be harmful even in those cases. | |
609 | */ | |
610 | static void bdw_setup_private_ppat(struct intel_uncore *uncore) | |
611 | { | |
c0888e9e | 612 | struct drm_i915_private *i915 = uncore->i915; |
2c86e55d MA |
613 | u64 pat; |
614 | ||
615 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ | |
616 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ | |
2c86e55d MA |
617 | GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ |
618 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | | |
619 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | | |
620 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | | |
621 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
622 | ||
c0888e9e | 623 | /* for scanout with eLLC */ |
c816723b | 624 | if (GRAPHICS_VER(i915) >= 9) |
c0888e9e VS |
625 | pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
626 | else | |
627 | pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); | |
628 | ||
2c86e55d MA |
629 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); |
630 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
631 | } | |
632 | ||
633 | static void chv_setup_private_ppat(struct intel_uncore *uncore) | |
634 | { | |
635 | u64 pat; | |
636 | ||
637 | /* | |
638 | * Map WB on BDW to snooped on CHV. | |
639 | * | |
640 | * Only the snoop bit has meaning for CHV, the rest is | |
641 | * ignored. | |
642 | * | |
643 | * The hardware will never snoop for certain types of accesses: | |
644 | * - CPU GTT (GMADR->GGTT->no snoop->memory) | |
645 | * - PPGTT page tables | |
646 | * - some other special cycles | |
647 | * | |
648 | * As with BDW, we also need to consider the following for GT accesses: | |
649 | * "For GGTT, there is NO pat_sel[2:0] from the entry, | |
650 | * so RTL will always use the value corresponding to | |
651 | * pat_sel = 000". | |
652 | * Which means we must set the snoop bit in PAT entry 0 | |
653 | * in order to keep the global status page working. | |
654 | */ | |
655 | ||
656 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | | |
657 | GEN8_PPAT(1, 0) | | |
658 | GEN8_PPAT(2, 0) | | |
659 | GEN8_PPAT(3, 0) | | |
660 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | | |
661 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | | |
662 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | | |
663 | GEN8_PPAT(7, CHV_PPAT_SNOOP); | |
664 | ||
665 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); | |
666 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
667 | } | |
668 | ||
77fa9efc | 669 | void setup_private_pat(struct intel_gt *gt) |
2c86e55d | 670 | { |
77fa9efc MR |
671 | struct intel_uncore *uncore = gt->uncore; |
672 | struct drm_i915_private *i915 = gt->i915; | |
2c86e55d | 673 | |
c816723b | 674 | GEM_BUG_ON(GRAPHICS_VER(i915) < 8); |
2c86e55d | 675 | |
b76c0dee MTP |
676 | if (gt->type == GT_MEDIA) { |
677 | xelpmp_setup_private_ppat(gt->uncore); | |
678 | return; | |
679 | } | |
680 | ||
681 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) | |
682 | xelpg_setup_private_ppat(gt); | |
48ba4a6d | 683 | else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) |
77fa9efc MR |
684 | xehp_setup_private_ppat(gt); |
685 | else if (GRAPHICS_VER(i915) >= 12) | |
2c86e55d | 686 | tgl_setup_private_ppat(uncore); |
6266992c LDM |
687 | else if (GRAPHICS_VER(i915) >= 11) |
688 | icl_setup_private_ppat(uncore); | |
2c86e55d MA |
689 | else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) |
690 | chv_setup_private_ppat(uncore); | |
691 | else | |
692 | bdw_setup_private_ppat(uncore); | |
693 | } | |
694 | ||
a4d86249 CW |
695 | struct i915_vma * |
696 | __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) | |
697 | { | |
698 | struct drm_i915_gem_object *obj; | |
699 | struct i915_vma *vma; | |
a4d86249 CW |
700 | |
701 | obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); | |
702 | if (IS_ERR(obj)) | |
703 | return ERR_CAST(obj); | |
704 | ||
49c60b2f | 705 | i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); |
a4d86249 CW |
706 | |
707 | vma = i915_vma_instance(obj, vm, NULL); | |
708 | if (IS_ERR(vma)) { | |
709 | i915_gem_object_put(obj); | |
710 | return vma; | |
711 | } | |
712 | ||
2a665968 ML |
713 | return vma; |
714 | } | |
715 | ||
716 | struct i915_vma * | |
717 | __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) | |
718 | { | |
719 | struct i915_vma *vma; | |
720 | int err; | |
721 | ||
722 | vma = __vm_create_scratch_for_read(vm, size); | |
723 | if (IS_ERR(vma)) | |
724 | return vma; | |
725 | ||
a4d86249 CW |
726 | err = i915_vma_pin(vma, 0, 0, |
727 | i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); | |
728 | if (err) { | |
729 | i915_vma_put(vma); | |
730 | return ERR_PTR(err); | |
731 | } | |
732 | ||
733 | return vma; | |
734 | } | |
735 | ||
2c86e55d MA |
736 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
737 | #include "selftests/mock_gtt.c" | |
738 | #endif |