Commit | Line | Data |
---|---|---|
2c86e55d MA |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include <linux/slab.h> /* fault-inject.h is not standalone! */ | |
7 | ||
8 | #include <linux/fault-inject.h> | |
8581fd40 | 9 | #include <linux/sched/mm.h> |
2c86e55d | 10 | |
2ca77606 MA |
11 | #include <drm/drm_cache.h> |
12 | ||
b508d01f | 13 | #include "gem/i915_gem_internal.h" |
6aed5673 | 14 | #include "gem/i915_gem_lmem.h" |
2c86e55d | 15 | #include "i915_trace.h" |
a7f46d5b | 16 | #include "i915_utils.h" |
2c86e55d | 17 | #include "intel_gt.h" |
77fa9efc | 18 | #include "intel_gt_mcr.h" |
0d6419e9 | 19 | #include "intel_gt_regs.h" |
2c86e55d MA |
20 | #include "intel_gtt.h" |
21 | ||
a7f46d5b TU |
22 | |
23 | static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) | |
24 | { | |
25 | return IS_BROXTON(i915) && i915_vtd_active(i915); | |
26 | } | |
27 | ||
28 | bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) | |
29 | { | |
30 | return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); | |
31 | } | |
32 | ||
6aed5673 MA |
33 | struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) |
34 | { | |
35 | struct drm_i915_gem_object *obj; | |
36 | ||
32334c9b MA |
37 | /* |
38 | * To avoid severe over-allocation when dealing with min_page_size | |
39 | * restrictions, we override that behaviour here by allowing an object | |
40 | * size and page layout which can be smaller. In practice this should be | |
41 | * totally fine, since GTT paging structures are not typically inserted | |
42 | * into the GTT. | |
43 | * | |
44 | * Note that we also hit this path for the scratch page, and for this | |
45 | * case it might need to be 64K, but that should work fine here since we | |
46 | * used the passed in size for the page size, which should ensure it | |
47 | * also has the same alignment. | |
48 | */ | |
a259cc14 TH |
49 | obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, |
50 | vm->lmem_pt_obj_flags); | |
6aed5673 MA |
51 | /* |
52 | * Ensure all paging structures for this vm share the same dma-resv | |
53 | * object underneath, with the idea that one object_lock() will lock | |
54 | * them all at once. | |
55 | */ | |
4d8151ae TH |
56 | if (!IS_ERR(obj)) { |
57 | obj->base.resv = i915_vm_resv_get(vm); | |
58 | obj->shares_resv_from = vm; | |
59 | } | |
60 | ||
6aed5673 MA |
61 | return obj; |
62 | } | |
63 | ||
89351925 | 64 | struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) |
2c86e55d | 65 | { |
26ad4f8b ML |
66 | struct drm_i915_gem_object *obj; |
67 | ||
2c86e55d MA |
68 | if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) |
69 | i915_gem_shrink_all(vm->i915); | |
70 | ||
26ad4f8b | 71 | obj = i915_gem_object_create_internal(vm->i915, sz); |
6aed5673 MA |
72 | /* |
73 | * Ensure all paging structures for this vm share the same dma-resv | |
74 | * object underneath, with the idea that one object_lock() will lock | |
75 | * them all at once. | |
76 | */ | |
4d8151ae TH |
77 | if (!IS_ERR(obj)) { |
78 | obj->base.resv = i915_vm_resv_get(vm); | |
79 | obj->shares_resv_from = vm; | |
80 | } | |
81 | ||
26ad4f8b | 82 | return obj; |
2c86e55d MA |
83 | } |
84 | ||
529b9ec8 | 85 | int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
2c86e55d | 86 | { |
6aed5673 | 87 | enum i915_map_type type; |
529b9ec8 | 88 | void *vaddr; |
2c86e55d | 89 | |
6aed5673 MA |
90 | type = i915_coherent_map_type(vm->i915, obj, true); |
91 | vaddr = i915_gem_object_pin_map_unlocked(obj, type); | |
529b9ec8 MA |
92 | if (IS_ERR(vaddr)) |
93 | return PTR_ERR(vaddr); | |
26ad4f8b ML |
94 | |
95 | i915_gem_object_make_unshrinkable(obj); | |
96 | return 0; | |
97 | } | |
98 | ||
529b9ec8 | 99 | int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
26ad4f8b | 100 | { |
6aed5673 | 101 | enum i915_map_type type; |
529b9ec8 | 102 | void *vaddr; |
26ad4f8b | 103 | |
6aed5673 MA |
104 | type = i915_coherent_map_type(vm->i915, obj, true); |
105 | vaddr = i915_gem_object_pin_map(obj, type); | |
529b9ec8 MA |
106 | if (IS_ERR(vaddr)) |
107 | return PTR_ERR(vaddr); | |
2c86e55d | 108 | |
89351925 CW |
109 | i915_gem_object_make_unshrinkable(obj); |
110 | return 0; | |
2c86e55d MA |
111 | } |
112 | ||
e1a7ab4f | 113 | static void clear_vm_list(struct list_head *list) |
2c86e55d MA |
114 | { |
115 | struct i915_vma *vma, *vn; | |
116 | ||
e1a7ab4f | 117 | list_for_each_entry_safe(vma, vn, list, vm_link) { |
2c86e55d MA |
118 | struct drm_i915_gem_object *obj = vma->obj; |
119 | ||
e1a7ab4f | 120 | if (!i915_gem_object_get_rcu(obj)) { |
c03d9826 | 121 | /* |
e1a7ab4f TH |
122 | * Object is dying, but has not yet cleared its |
123 | * vma list. | |
124 | * Unbind the dying vma to ensure our list | |
c03d9826 | 125 | * is completely drained. We leave the destruction to |
e1a7ab4f TH |
126 | * the object destructor to avoid the vma |
127 | * disappearing under it. | |
c03d9826 TH |
128 | */ |
129 | atomic_and(~I915_VMA_PIN_MASK, &vma->flags); | |
130 | WARN_ON(__i915_vma_unbind(vma)); | |
e1a7ab4f TH |
131 | |
132 | /* Remove from the unbound list */ | |
133 | list_del_init(&vma->vm_link); | |
134 | ||
135 | /* | |
136 | * Delay the vm and vm mutex freeing until the | |
137 | * object is done with destruction. | |
138 | */ | |
139 | i915_vm_resv_get(vma->vm); | |
140 | vma->vm_ddestroy = true; | |
141 | } else { | |
142 | i915_vma_destroy_locked(vma); | |
143 | i915_gem_object_put(obj); | |
c03d9826 | 144 | } |
2c86e55d | 145 | |
2c86e55d | 146 | } |
e1a7ab4f TH |
147 | } |
148 | ||
149 | static void __i915_vm_close(struct i915_address_space *vm) | |
150 | { | |
151 | mutex_lock(&vm->mutex); | |
152 | ||
153 | clear_vm_list(&vm->bound_list); | |
154 | clear_vm_list(&vm->unbound_list); | |
155 | ||
156 | /* Check for must-fix unanticipated side-effects */ | |
2c86e55d | 157 | GEM_BUG_ON(!list_empty(&vm->bound_list)); |
e1a7ab4f | 158 | GEM_BUG_ON(!list_empty(&vm->unbound_list)); |
ad2f9bc9 | 159 | |
2c86e55d MA |
160 | mutex_unlock(&vm->mutex); |
161 | } | |
162 | ||
26ad4f8b ML |
163 | /* lock the vm into the current ww, if we lock one, we lock all */ |
164 | int i915_vm_lock_objects(struct i915_address_space *vm, | |
165 | struct i915_gem_ww_ctx *ww) | |
166 | { | |
4d8151ae | 167 | if (vm->scratch[0]->base.resv == &vm->_resv) { |
26ad4f8b ML |
168 | return i915_gem_object_lock(vm->scratch[0], ww); |
169 | } else { | |
170 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); | |
171 | ||
172 | /* We borrowed the scratch page from ggtt, take the top level object */ | |
173 | return i915_gem_object_lock(ppgtt->pd->pt.base, ww); | |
174 | } | |
175 | } | |
176 | ||
2c86e55d MA |
177 | void i915_address_space_fini(struct i915_address_space *vm) |
178 | { | |
2c86e55d | 179 | drm_mm_takedown(&vm->mm); |
2c86e55d MA |
180 | } |
181 | ||
4d8151ae TH |
182 | /** |
183 | * i915_vm_resv_release - Final struct i915_address_space destructor | |
184 | * @kref: Pointer to the &i915_address_space.resv_ref member. | |
185 | * | |
186 | * This function is called when the last lock sharer no longer shares the | |
e1a7ab4f TH |
187 | * &i915_address_space._resv lock, and also if we raced when |
188 | * destroying a vma by the vma destruction | |
4d8151ae TH |
189 | */ |
190 | void i915_vm_resv_release(struct kref *kref) | |
191 | { | |
192 | struct i915_address_space *vm = | |
193 | container_of(kref, typeof(*vm), resv_ref); | |
194 | ||
195 | dma_resv_fini(&vm->_resv); | |
e1a7ab4f TH |
196 | mutex_destroy(&vm->mutex); |
197 | ||
4d8151ae TH |
198 | kfree(vm); |
199 | } | |
200 | ||
2c86e55d MA |
201 | static void __i915_vm_release(struct work_struct *work) |
202 | { | |
203 | struct i915_address_space *vm = | |
dcc5d820 | 204 | container_of(work, struct i915_address_space, release_work); |
2c86e55d | 205 | |
e1a7ab4f TH |
206 | __i915_vm_close(vm); |
207 | ||
2f6b90da TH |
208 | /* Synchronize async unbinds. */ |
209 | i915_vma_resource_bind_dep_sync_all(vm); | |
210 | ||
2c86e55d MA |
211 | vm->cleanup(vm); |
212 | i915_address_space_fini(vm); | |
213 | ||
4d8151ae | 214 | i915_vm_resv_put(vm); |
2c86e55d MA |
215 | } |
216 | ||
217 | void i915_vm_release(struct kref *kref) | |
218 | { | |
219 | struct i915_address_space *vm = | |
220 | container_of(kref, struct i915_address_space, ref); | |
221 | ||
222 | GEM_BUG_ON(i915_is_ggtt(vm)); | |
223 | trace_i915_ppgtt_release(vm); | |
224 | ||
dcc5d820 | 225 | queue_work(vm->i915->wq, &vm->release_work); |
2c86e55d MA |
226 | } |
227 | ||
228 | void i915_address_space_init(struct i915_address_space *vm, int subclass) | |
229 | { | |
230 | kref_init(&vm->ref); | |
4d8151ae TH |
231 | |
232 | /* | |
233 | * Special case for GGTT that has already done an early | |
234 | * kref_init here. | |
235 | */ | |
236 | if (!kref_read(&vm->resv_ref)) | |
237 | kref_init(&vm->resv_ref); | |
238 | ||
2f6b90da | 239 | vm->pending_unbind = RB_ROOT_CACHED; |
dcc5d820 | 240 | INIT_WORK(&vm->release_work, __i915_vm_release); |
2c86e55d MA |
241 | |
242 | /* | |
243 | * The vm->mutex must be reclaim safe (for use in the shrinker). | |
244 | * Do a dummy acquire now under fs_reclaim so that any allocation | |
245 | * attempt holding the lock is immediately reported by lockdep. | |
246 | */ | |
247 | mutex_init(&vm->mutex); | |
248 | lockdep_set_subclass(&vm->mutex, subclass); | |
bc6f80cc ML |
249 | |
250 | if (!intel_vm_no_concurrent_access_wa(vm->i915)) { | |
251 | i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); | |
252 | } else { | |
253 | /* | |
254 | * CHV + BXT VTD workaround use stop_machine(), | |
255 | * which is allowed to allocate memory. This means &vm->mutex | |
256 | * is the outer lock, and in theory we can allocate memory inside | |
257 | * it through stop_machine(). | |
258 | * | |
259 | * Add the annotation for this, we use trylock in shrinker. | |
260 | */ | |
261 | mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); | |
262 | might_alloc(GFP_KERNEL); | |
263 | mutex_release(&vm->mutex.dep_map, _THIS_IP_); | |
264 | } | |
4d8151ae | 265 | dma_resv_init(&vm->_resv); |
2c86e55d MA |
266 | |
267 | GEM_BUG_ON(!vm->total); | |
268 | drm_mm_init(&vm->mm, 0, vm->total); | |
87bd701e MA |
269 | |
270 | memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, | |
271 | ARRAY_SIZE(vm->min_alignment)); | |
272 | ||
8133a6da | 273 | if (HAS_64K_PAGES(vm->i915)) { |
87bd701e MA |
274 | vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; |
275 | vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; | |
276 | } | |
277 | ||
2c86e55d MA |
278 | vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; |
279 | ||
2c86e55d | 280 | INIT_LIST_HEAD(&vm->bound_list); |
e1a7ab4f | 281 | INIT_LIST_HEAD(&vm->unbound_list); |
2c86e55d MA |
282 | } |
283 | ||
529b9ec8 MA |
284 | void *__px_vaddr(struct drm_i915_gem_object *p) |
285 | { | |
286 | enum i915_map_type type; | |
287 | ||
288 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); | |
289 | return page_unpack_bits(p->mm.mapping, &type); | |
290 | } | |
291 | ||
89351925 | 292 | dma_addr_t __px_dma(struct drm_i915_gem_object *p) |
2c86e55d | 293 | { |
89351925 CW |
294 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
295 | return sg_dma_address(p->mm.pages->sgl); | |
2c86e55d MA |
296 | } |
297 | ||
89351925 | 298 | struct page *__px_page(struct drm_i915_gem_object *p) |
2c86e55d | 299 | { |
89351925 CW |
300 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
301 | return sg_page(p->mm.pages->sgl); | |
2c86e55d MA |
302 | } |
303 | ||
304 | void | |
89351925 | 305 | fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) |
2c86e55d | 306 | { |
529b9ec8 | 307 | void *vaddr = __px_vaddr(p); |
89351925 | 308 | |
89351925 | 309 | memset64(vaddr, val, count); |
61c5ed94 | 310 | drm_clflush_virt_range(vaddr, PAGE_SIZE); |
2c86e55d MA |
311 | } |
312 | ||
89351925 | 313 | static void poison_scratch_page(struct drm_i915_gem_object *scratch) |
82d71e31 | 314 | { |
529b9ec8 | 315 | void *vaddr = __px_vaddr(scratch); |
89351925 | 316 | u8 val; |
82d71e31 | 317 | |
89351925 CW |
318 | val = 0; |
319 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | |
320 | val = POISON_FREE; | |
82d71e31 | 321 | |
529b9ec8 | 322 | memset(vaddr, val, scratch->base.size); |
2ca77606 | 323 | drm_clflush_virt_range(vaddr, scratch->base.size); |
82d71e31 CW |
324 | } |
325 | ||
89351925 | 326 | int setup_scratch_page(struct i915_address_space *vm) |
2c86e55d MA |
327 | { |
328 | unsigned long size; | |
329 | ||
330 | /* | |
331 | * In order to utilize 64K pages for an object with a size < 2M, we will | |
332 | * need to support a 64K scratch page, given that every 16th entry for a | |
333 | * page-table operating in 64K mode must point to a properly aligned 64K | |
334 | * region, including any PTEs which happen to point to scratch. | |
335 | * | |
336 | * This is only relevant for the 48b PPGTT where we support | |
337 | * huge-gtt-pages, see also i915_vma_insert(). However, as we share the | |
338 | * scratch (read-only) between all vm, we create one 64k scratch page | |
339 | * for all. | |
340 | */ | |
341 | size = I915_GTT_PAGE_SIZE_4K; | |
342 | if (i915_vm_is_4lvl(vm) && | |
8133a6da MA |
343 | HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && |
344 | !HAS_64K_PAGES(vm->i915)) | |
2c86e55d | 345 | size = I915_GTT_PAGE_SIZE_64K; |
2c86e55d MA |
346 | |
347 | do { | |
89351925 | 348 | struct drm_i915_gem_object *obj; |
2c86e55d | 349 | |
fef53be0 | 350 | obj = vm->alloc_scratch_dma(vm, size); |
89351925 | 351 | if (IS_ERR(obj)) |
2c86e55d MA |
352 | goto skip; |
353 | ||
529b9ec8 | 354 | if (map_pt_dma(vm, obj)) |
89351925 CW |
355 | goto skip_obj; |
356 | ||
357 | /* We need a single contiguous page for our scratch */ | |
358 | if (obj->mm.page_sizes.sg < size) | |
359 | goto skip_obj; | |
360 | ||
361 | /* And it needs to be correspondingly aligned */ | |
362 | if (__px_dma(obj) & (size - 1)) | |
363 | goto skip_obj; | |
364 | ||
82d71e31 CW |
365 | /* |
366 | * Use a non-zero scratch page for debugging. | |
367 | * | |
368 | * We want a value that should be reasonably obvious | |
369 | * to spot in the error state, while also causing a GPU hang | |
370 | * if executed. We prefer using a clear page in production, so | |
371 | * should it ever be accidentally used, the effect should be | |
372 | * fairly benign. | |
373 | */ | |
89351925 CW |
374 | poison_scratch_page(obj); |
375 | ||
376 | vm->scratch[0] = obj; | |
377 | vm->scratch_order = get_order(size); | |
2c86e55d MA |
378 | return 0; |
379 | ||
89351925 CW |
380 | skip_obj: |
381 | i915_gem_object_put(obj); | |
2c86e55d MA |
382 | skip: |
383 | if (size == I915_GTT_PAGE_SIZE_4K) | |
384 | return -ENOMEM; | |
385 | ||
386 | size = I915_GTT_PAGE_SIZE_4K; | |
2c86e55d MA |
387 | } while (1); |
388 | } | |
389 | ||
2c86e55d MA |
390 | void free_scratch(struct i915_address_space *vm) |
391 | { | |
392 | int i; | |
393 | ||
c286558f CW |
394 | if (!vm->scratch[0]) |
395 | return; | |
396 | ||
89351925 CW |
397 | for (i = 0; i <= vm->top; i++) |
398 | i915_gem_object_put(vm->scratch[i]); | |
2c86e55d MA |
399 | } |
400 | ||
401 | void gtt_write_workarounds(struct intel_gt *gt) | |
402 | { | |
403 | struct drm_i915_private *i915 = gt->i915; | |
404 | struct intel_uncore *uncore = gt->uncore; | |
405 | ||
406 | /* | |
407 | * This function is for gtt related workarounds. This function is | |
408 | * called on driver load and after a GPU reset, so you can place | |
409 | * workarounds here even if they get overwritten by GPU reset. | |
410 | */ | |
411 | /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ | |
412 | if (IS_BROADWELL(i915)) | |
413 | intel_uncore_write(uncore, | |
414 | GEN8_L3_LRA_1_GPGPU, | |
415 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); | |
416 | else if (IS_CHERRYVIEW(i915)) | |
417 | intel_uncore_write(uncore, | |
418 | GEN8_L3_LRA_1_GPGPU, | |
419 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); | |
420 | else if (IS_GEN9_LP(i915)) | |
421 | intel_uncore_write(uncore, | |
422 | GEN8_L3_LRA_1_GPGPU, | |
423 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); | |
c816723b | 424 | else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) |
2c86e55d MA |
425 | intel_uncore_write(uncore, |
426 | GEN8_L3_LRA_1_GPGPU, | |
427 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); | |
428 | ||
429 | /* | |
430 | * To support 64K PTEs we need to first enable the use of the | |
431 | * Intermediate-Page-Size(IPS) bit of the PDE field via some magical | |
432 | * mmio, otherwise the page-walker will simply ignore the IPS bit. This | |
433 | * shouldn't be needed after GEN10. | |
434 | * | |
435 | * 64K pages were first introduced from BDW+, although technically they | |
436 | * only *work* from gen9+. For pre-BDW we instead have the option for | |
437 | * 32K pages, but we don't currently have any support for it in our | |
438 | * driver. | |
439 | */ | |
440 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && | |
c816723b | 441 | GRAPHICS_VER(i915) <= 10) |
2c86e55d MA |
442 | intel_uncore_rmw(uncore, |
443 | GEN8_GAMW_ECO_DEV_RW_IA, | |
444 | 0, | |
445 | GAMW_ECO_ENABLE_64K_IPS_FIELD); | |
446 | ||
c816723b | 447 | if (IS_GRAPHICS_VER(i915, 8, 11)) { |
2c86e55d MA |
448 | bool can_use_gtt_cache = true; |
449 | ||
450 | /* | |
451 | * According to the BSpec if we use 2M/1G pages then we also | |
452 | * need to disable the GTT cache. At least on BDW we can see | |
453 | * visual corruption when using 2M pages, and not disabling the | |
454 | * GTT cache. | |
455 | */ | |
456 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) | |
457 | can_use_gtt_cache = false; | |
458 | ||
459 | /* WaGttCachingOffByDefault */ | |
460 | intel_uncore_write(uncore, | |
461 | HSW_GTT_CACHE_EN, | |
462 | can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); | |
0d4c351a PB |
463 | drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && |
464 | intel_uncore_read(uncore, | |
465 | HSW_GTT_CACHE_EN) == 0); | |
2c86e55d MA |
466 | } |
467 | } | |
468 | ||
2c86e55d MA |
469 | static void tgl_setup_private_ppat(struct intel_uncore *uncore) |
470 | { | |
471 | /* TGL doesn't support LLC or AGE settings */ | |
472 | intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); | |
473 | intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); | |
474 | intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); | |
475 | intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); | |
476 | intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); | |
477 | intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); | |
478 | intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); | |
479 | intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); | |
480 | } | |
481 | ||
77fa9efc MR |
482 | static void xehp_setup_private_ppat(struct intel_gt *gt) |
483 | { | |
484 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); | |
485 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); | |
486 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); | |
487 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); | |
488 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); | |
489 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); | |
490 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); | |
491 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); | |
492 | } | |
493 | ||
6266992c | 494 | static void icl_setup_private_ppat(struct intel_uncore *uncore) |
2c86e55d MA |
495 | { |
496 | intel_uncore_write(uncore, | |
497 | GEN10_PAT_INDEX(0), | |
498 | GEN8_PPAT_WB | GEN8_PPAT_LLC); | |
499 | intel_uncore_write(uncore, | |
500 | GEN10_PAT_INDEX(1), | |
501 | GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); | |
502 | intel_uncore_write(uncore, | |
503 | GEN10_PAT_INDEX(2), | |
c0888e9e | 504 | GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
2c86e55d MA |
505 | intel_uncore_write(uncore, |
506 | GEN10_PAT_INDEX(3), | |
507 | GEN8_PPAT_UC); | |
508 | intel_uncore_write(uncore, | |
509 | GEN10_PAT_INDEX(4), | |
510 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); | |
511 | intel_uncore_write(uncore, | |
512 | GEN10_PAT_INDEX(5), | |
513 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); | |
514 | intel_uncore_write(uncore, | |
515 | GEN10_PAT_INDEX(6), | |
516 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); | |
517 | intel_uncore_write(uncore, | |
518 | GEN10_PAT_INDEX(7), | |
519 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
520 | } | |
521 | ||
522 | /* | |
523 | * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability | |
524 | * bits. When using advanced contexts each context stores its own PAT, but | |
525 | * writing this data shouldn't be harmful even in those cases. | |
526 | */ | |
527 | static void bdw_setup_private_ppat(struct intel_uncore *uncore) | |
528 | { | |
c0888e9e | 529 | struct drm_i915_private *i915 = uncore->i915; |
2c86e55d MA |
530 | u64 pat; |
531 | ||
532 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ | |
533 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ | |
2c86e55d MA |
534 | GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ |
535 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | | |
536 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | | |
537 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | | |
538 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
539 | ||
c0888e9e | 540 | /* for scanout with eLLC */ |
c816723b | 541 | if (GRAPHICS_VER(i915) >= 9) |
c0888e9e VS |
542 | pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
543 | else | |
544 | pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); | |
545 | ||
2c86e55d MA |
546 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); |
547 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
548 | } | |
549 | ||
550 | static void chv_setup_private_ppat(struct intel_uncore *uncore) | |
551 | { | |
552 | u64 pat; | |
553 | ||
554 | /* | |
555 | * Map WB on BDW to snooped on CHV. | |
556 | * | |
557 | * Only the snoop bit has meaning for CHV, the rest is | |
558 | * ignored. | |
559 | * | |
560 | * The hardware will never snoop for certain types of accesses: | |
561 | * - CPU GTT (GMADR->GGTT->no snoop->memory) | |
562 | * - PPGTT page tables | |
563 | * - some other special cycles | |
564 | * | |
565 | * As with BDW, we also need to consider the following for GT accesses: | |
566 | * "For GGTT, there is NO pat_sel[2:0] from the entry, | |
567 | * so RTL will always use the value corresponding to | |
568 | * pat_sel = 000". | |
569 | * Which means we must set the snoop bit in PAT entry 0 | |
570 | * in order to keep the global status page working. | |
571 | */ | |
572 | ||
573 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | | |
574 | GEN8_PPAT(1, 0) | | |
575 | GEN8_PPAT(2, 0) | | |
576 | GEN8_PPAT(3, 0) | | |
577 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | | |
578 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | | |
579 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | | |
580 | GEN8_PPAT(7, CHV_PPAT_SNOOP); | |
581 | ||
582 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); | |
583 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
584 | } | |
585 | ||
77fa9efc | 586 | void setup_private_pat(struct intel_gt *gt) |
2c86e55d | 587 | { |
77fa9efc MR |
588 | struct intel_uncore *uncore = gt->uncore; |
589 | struct drm_i915_private *i915 = gt->i915; | |
2c86e55d | 590 | |
c816723b | 591 | GEM_BUG_ON(GRAPHICS_VER(i915) < 8); |
2c86e55d | 592 | |
77fa9efc MR |
593 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) |
594 | xehp_setup_private_ppat(gt); | |
595 | else if (GRAPHICS_VER(i915) >= 12) | |
2c86e55d | 596 | tgl_setup_private_ppat(uncore); |
6266992c LDM |
597 | else if (GRAPHICS_VER(i915) >= 11) |
598 | icl_setup_private_ppat(uncore); | |
2c86e55d MA |
599 | else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) |
600 | chv_setup_private_ppat(uncore); | |
601 | else | |
602 | bdw_setup_private_ppat(uncore); | |
603 | } | |
604 | ||
a4d86249 CW |
605 | struct i915_vma * |
606 | __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) | |
607 | { | |
608 | struct drm_i915_gem_object *obj; | |
609 | struct i915_vma *vma; | |
a4d86249 CW |
610 | |
611 | obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); | |
612 | if (IS_ERR(obj)) | |
613 | return ERR_CAST(obj); | |
614 | ||
615 | i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); | |
616 | ||
617 | vma = i915_vma_instance(obj, vm, NULL); | |
618 | if (IS_ERR(vma)) { | |
619 | i915_gem_object_put(obj); | |
620 | return vma; | |
621 | } | |
622 | ||
2a665968 ML |
623 | return vma; |
624 | } | |
625 | ||
626 | struct i915_vma * | |
627 | __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) | |
628 | { | |
629 | struct i915_vma *vma; | |
630 | int err; | |
631 | ||
632 | vma = __vm_create_scratch_for_read(vm, size); | |
633 | if (IS_ERR(vma)) | |
634 | return vma; | |
635 | ||
a4d86249 CW |
636 | err = i915_vma_pin(vma, 0, 0, |
637 | i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); | |
638 | if (err) { | |
639 | i915_vma_put(vma); | |
640 | return ERR_PTR(err); | |
641 | } | |
642 | ||
643 | return vma; | |
644 | } | |
645 | ||
2c86e55d MA |
646 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
647 | #include "selftests/mock_gtt.c" | |
648 | #endif |