Commit | Line | Data |
---|---|---|
2c86e55d MA |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include <linux/slab.h> /* fault-inject.h is not standalone! */ | |
7 | ||
8 | #include <linux/fault-inject.h> | |
9 | ||
6aed5673 | 10 | #include "gem/i915_gem_lmem.h" |
2c86e55d MA |
11 | #include "i915_trace.h" |
12 | #include "intel_gt.h" | |
13 | #include "intel_gtt.h" | |
14 | ||
6aed5673 MA |
15 | struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) |
16 | { | |
17 | struct drm_i915_gem_object *obj; | |
18 | ||
32334c9b MA |
19 | /* |
20 | * To avoid severe over-allocation when dealing with min_page_size | |
21 | * restrictions, we override that behaviour here by allowing an object | |
22 | * size and page layout which can be smaller. In practice this should be | |
23 | * totally fine, since GTT paging structures are not typically inserted | |
24 | * into the GTT. | |
25 | * | |
26 | * Note that we also hit this path for the scratch page, and for this | |
27 | * case it might need to be 64K, but that should work fine here since we | |
28 | * used the passed in size for the page size, which should ensure it | |
29 | * also has the same alignment. | |
30 | */ | |
31 | obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0); | |
6aed5673 MA |
32 | /* |
33 | * Ensure all paging structures for this vm share the same dma-resv | |
34 | * object underneath, with the idea that one object_lock() will lock | |
35 | * them all at once. | |
36 | */ | |
4d8151ae TH |
37 | if (!IS_ERR(obj)) { |
38 | obj->base.resv = i915_vm_resv_get(vm); | |
39 | obj->shares_resv_from = vm; | |
40 | } | |
41 | ||
6aed5673 MA |
42 | return obj; |
43 | } | |
44 | ||
89351925 | 45 | struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) |
2c86e55d | 46 | { |
26ad4f8b ML |
47 | struct drm_i915_gem_object *obj; |
48 | ||
2c86e55d MA |
49 | if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) |
50 | i915_gem_shrink_all(vm->i915); | |
51 | ||
26ad4f8b | 52 | obj = i915_gem_object_create_internal(vm->i915, sz); |
6aed5673 MA |
53 | /* |
54 | * Ensure all paging structures for this vm share the same dma-resv | |
55 | * object underneath, with the idea that one object_lock() will lock | |
56 | * them all at once. | |
57 | */ | |
4d8151ae TH |
58 | if (!IS_ERR(obj)) { |
59 | obj->base.resv = i915_vm_resv_get(vm); | |
60 | obj->shares_resv_from = vm; | |
61 | } | |
62 | ||
26ad4f8b | 63 | return obj; |
2c86e55d MA |
64 | } |
65 | ||
529b9ec8 | 66 | int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
2c86e55d | 67 | { |
6aed5673 | 68 | enum i915_map_type type; |
529b9ec8 | 69 | void *vaddr; |
2c86e55d | 70 | |
6aed5673 MA |
71 | type = i915_coherent_map_type(vm->i915, obj, true); |
72 | vaddr = i915_gem_object_pin_map_unlocked(obj, type); | |
529b9ec8 MA |
73 | if (IS_ERR(vaddr)) |
74 | return PTR_ERR(vaddr); | |
26ad4f8b ML |
75 | |
76 | i915_gem_object_make_unshrinkable(obj); | |
77 | return 0; | |
78 | } | |
79 | ||
529b9ec8 | 80 | int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
26ad4f8b | 81 | { |
6aed5673 | 82 | enum i915_map_type type; |
529b9ec8 | 83 | void *vaddr; |
26ad4f8b | 84 | |
6aed5673 MA |
85 | type = i915_coherent_map_type(vm->i915, obj, true); |
86 | vaddr = i915_gem_object_pin_map(obj, type); | |
529b9ec8 MA |
87 | if (IS_ERR(vaddr)) |
88 | return PTR_ERR(vaddr); | |
2c86e55d | 89 | |
89351925 CW |
90 | i915_gem_object_make_unshrinkable(obj); |
91 | return 0; | |
2c86e55d MA |
92 | } |
93 | ||
94 | void __i915_vm_close(struct i915_address_space *vm) | |
95 | { | |
96 | struct i915_vma *vma, *vn; | |
97 | ||
ad2f9bc9 CW |
98 | if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) |
99 | return; | |
100 | ||
2c86e55d MA |
101 | list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { |
102 | struct drm_i915_gem_object *obj = vma->obj; | |
103 | ||
104 | /* Keep the obj (and hence the vma) alive as _we_ destroy it */ | |
105 | if (!kref_get_unless_zero(&obj->base.refcount)) | |
106 | continue; | |
107 | ||
108 | atomic_and(~I915_VMA_PIN_MASK, &vma->flags); | |
109 | WARN_ON(__i915_vma_unbind(vma)); | |
110 | __i915_vma_put(vma); | |
111 | ||
112 | i915_gem_object_put(obj); | |
113 | } | |
114 | GEM_BUG_ON(!list_empty(&vm->bound_list)); | |
ad2f9bc9 | 115 | |
2c86e55d MA |
116 | mutex_unlock(&vm->mutex); |
117 | } | |
118 | ||
26ad4f8b ML |
119 | /* lock the vm into the current ww, if we lock one, we lock all */ |
120 | int i915_vm_lock_objects(struct i915_address_space *vm, | |
121 | struct i915_gem_ww_ctx *ww) | |
122 | { | |
4d8151ae | 123 | if (vm->scratch[0]->base.resv == &vm->_resv) { |
26ad4f8b ML |
124 | return i915_gem_object_lock(vm->scratch[0], ww); |
125 | } else { | |
126 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); | |
127 | ||
128 | /* We borrowed the scratch page from ggtt, take the top level object */ | |
129 | return i915_gem_object_lock(ppgtt->pd->pt.base, ww); | |
130 | } | |
131 | } | |
132 | ||
2c86e55d MA |
133 | void i915_address_space_fini(struct i915_address_space *vm) |
134 | { | |
2c86e55d | 135 | drm_mm_takedown(&vm->mm); |
2c86e55d MA |
136 | mutex_destroy(&vm->mutex); |
137 | } | |
138 | ||
4d8151ae TH |
139 | /** |
140 | * i915_vm_resv_release - Final struct i915_address_space destructor | |
141 | * @kref: Pointer to the &i915_address_space.resv_ref member. | |
142 | * | |
143 | * This function is called when the last lock sharer no longer shares the | |
144 | * &i915_address_space._resv lock. | |
145 | */ | |
146 | void i915_vm_resv_release(struct kref *kref) | |
147 | { | |
148 | struct i915_address_space *vm = | |
149 | container_of(kref, typeof(*vm), resv_ref); | |
150 | ||
151 | dma_resv_fini(&vm->_resv); | |
152 | kfree(vm); | |
153 | } | |
154 | ||
2c86e55d MA |
155 | static void __i915_vm_release(struct work_struct *work) |
156 | { | |
157 | struct i915_address_space *vm = | |
dcc5d820 | 158 | container_of(work, struct i915_address_space, release_work); |
2c86e55d MA |
159 | |
160 | vm->cleanup(vm); | |
161 | i915_address_space_fini(vm); | |
162 | ||
4d8151ae | 163 | i915_vm_resv_put(vm); |
2c86e55d MA |
164 | } |
165 | ||
166 | void i915_vm_release(struct kref *kref) | |
167 | { | |
168 | struct i915_address_space *vm = | |
169 | container_of(kref, struct i915_address_space, ref); | |
170 | ||
171 | GEM_BUG_ON(i915_is_ggtt(vm)); | |
172 | trace_i915_ppgtt_release(vm); | |
173 | ||
dcc5d820 | 174 | queue_work(vm->i915->wq, &vm->release_work); |
2c86e55d MA |
175 | } |
176 | ||
177 | void i915_address_space_init(struct i915_address_space *vm, int subclass) | |
178 | { | |
179 | kref_init(&vm->ref); | |
4d8151ae TH |
180 | |
181 | /* | |
182 | * Special case for GGTT that has already done an early | |
183 | * kref_init here. | |
184 | */ | |
185 | if (!kref_read(&vm->resv_ref)) | |
186 | kref_init(&vm->resv_ref); | |
187 | ||
dcc5d820 | 188 | INIT_WORK(&vm->release_work, __i915_vm_release); |
2c86e55d MA |
189 | atomic_set(&vm->open, 1); |
190 | ||
191 | /* | |
192 | * The vm->mutex must be reclaim safe (for use in the shrinker). | |
193 | * Do a dummy acquire now under fs_reclaim so that any allocation | |
194 | * attempt holding the lock is immediately reported by lockdep. | |
195 | */ | |
196 | mutex_init(&vm->mutex); | |
197 | lockdep_set_subclass(&vm->mutex, subclass); | |
bc6f80cc ML |
198 | |
199 | if (!intel_vm_no_concurrent_access_wa(vm->i915)) { | |
200 | i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); | |
201 | } else { | |
202 | /* | |
203 | * CHV + BXT VTD workaround use stop_machine(), | |
204 | * which is allowed to allocate memory. This means &vm->mutex | |
205 | * is the outer lock, and in theory we can allocate memory inside | |
206 | * it through stop_machine(). | |
207 | * | |
208 | * Add the annotation for this, we use trylock in shrinker. | |
209 | */ | |
210 | mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); | |
211 | might_alloc(GFP_KERNEL); | |
212 | mutex_release(&vm->mutex.dep_map, _THIS_IP_); | |
213 | } | |
4d8151ae | 214 | dma_resv_init(&vm->_resv); |
2c86e55d MA |
215 | |
216 | GEM_BUG_ON(!vm->total); | |
217 | drm_mm_init(&vm->mm, 0, vm->total); | |
218 | vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; | |
219 | ||
2c86e55d MA |
220 | INIT_LIST_HEAD(&vm->bound_list); |
221 | } | |
222 | ||
223 | void clear_pages(struct i915_vma *vma) | |
224 | { | |
225 | GEM_BUG_ON(!vma->pages); | |
226 | ||
227 | if (vma->pages != vma->obj->mm.pages) { | |
228 | sg_free_table(vma->pages); | |
229 | kfree(vma->pages); | |
230 | } | |
231 | vma->pages = NULL; | |
232 | ||
233 | memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); | |
234 | } | |
235 | ||
529b9ec8 MA |
236 | void *__px_vaddr(struct drm_i915_gem_object *p) |
237 | { | |
238 | enum i915_map_type type; | |
239 | ||
240 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); | |
241 | return page_unpack_bits(p->mm.mapping, &type); | |
242 | } | |
243 | ||
89351925 | 244 | dma_addr_t __px_dma(struct drm_i915_gem_object *p) |
2c86e55d | 245 | { |
89351925 CW |
246 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
247 | return sg_dma_address(p->mm.pages->sgl); | |
2c86e55d MA |
248 | } |
249 | ||
89351925 | 250 | struct page *__px_page(struct drm_i915_gem_object *p) |
2c86e55d | 251 | { |
89351925 CW |
252 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
253 | return sg_page(p->mm.pages->sgl); | |
2c86e55d MA |
254 | } |
255 | ||
256 | void | |
89351925 | 257 | fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) |
2c86e55d | 258 | { |
529b9ec8 | 259 | void *vaddr = __px_vaddr(p); |
89351925 | 260 | |
89351925 CW |
261 | memset64(vaddr, val, count); |
262 | clflush_cache_range(vaddr, PAGE_SIZE); | |
2c86e55d MA |
263 | } |
264 | ||
89351925 | 265 | static void poison_scratch_page(struct drm_i915_gem_object *scratch) |
82d71e31 | 266 | { |
529b9ec8 | 267 | void *vaddr = __px_vaddr(scratch); |
89351925 | 268 | u8 val; |
82d71e31 | 269 | |
89351925 CW |
270 | val = 0; |
271 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | |
272 | val = POISON_FREE; | |
82d71e31 | 273 | |
529b9ec8 | 274 | memset(vaddr, val, scratch->base.size); |
82d71e31 CW |
275 | } |
276 | ||
89351925 | 277 | int setup_scratch_page(struct i915_address_space *vm) |
2c86e55d MA |
278 | { |
279 | unsigned long size; | |
280 | ||
281 | /* | |
282 | * In order to utilize 64K pages for an object with a size < 2M, we will | |
283 | * need to support a 64K scratch page, given that every 16th entry for a | |
284 | * page-table operating in 64K mode must point to a properly aligned 64K | |
285 | * region, including any PTEs which happen to point to scratch. | |
286 | * | |
287 | * This is only relevant for the 48b PPGTT where we support | |
288 | * huge-gtt-pages, see also i915_vma_insert(). However, as we share the | |
289 | * scratch (read-only) between all vm, we create one 64k scratch page | |
290 | * for all. | |
291 | */ | |
292 | size = I915_GTT_PAGE_SIZE_4K; | |
293 | if (i915_vm_is_4lvl(vm) && | |
89351925 | 294 | HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) |
2c86e55d | 295 | size = I915_GTT_PAGE_SIZE_64K; |
2c86e55d MA |
296 | |
297 | do { | |
89351925 | 298 | struct drm_i915_gem_object *obj; |
2c86e55d | 299 | |
89351925 CW |
300 | obj = vm->alloc_pt_dma(vm, size); |
301 | if (IS_ERR(obj)) | |
2c86e55d MA |
302 | goto skip; |
303 | ||
529b9ec8 | 304 | if (map_pt_dma(vm, obj)) |
89351925 CW |
305 | goto skip_obj; |
306 | ||
307 | /* We need a single contiguous page for our scratch */ | |
308 | if (obj->mm.page_sizes.sg < size) | |
309 | goto skip_obj; | |
310 | ||
311 | /* And it needs to be correspondingly aligned */ | |
312 | if (__px_dma(obj) & (size - 1)) | |
313 | goto skip_obj; | |
314 | ||
82d71e31 CW |
315 | /* |
316 | * Use a non-zero scratch page for debugging. | |
317 | * | |
318 | * We want a value that should be reasonably obvious | |
319 | * to spot in the error state, while also causing a GPU hang | |
320 | * if executed. We prefer using a clear page in production, so | |
321 | * should it ever be accidentally used, the effect should be | |
322 | * fairly benign. | |
323 | */ | |
89351925 CW |
324 | poison_scratch_page(obj); |
325 | ||
326 | vm->scratch[0] = obj; | |
327 | vm->scratch_order = get_order(size); | |
2c86e55d MA |
328 | return 0; |
329 | ||
89351925 CW |
330 | skip_obj: |
331 | i915_gem_object_put(obj); | |
2c86e55d MA |
332 | skip: |
333 | if (size == I915_GTT_PAGE_SIZE_4K) | |
334 | return -ENOMEM; | |
335 | ||
336 | size = I915_GTT_PAGE_SIZE_4K; | |
2c86e55d MA |
337 | } while (1); |
338 | } | |
339 | ||
2c86e55d MA |
340 | void free_scratch(struct i915_address_space *vm) |
341 | { | |
342 | int i; | |
343 | ||
89351925 CW |
344 | for (i = 0; i <= vm->top; i++) |
345 | i915_gem_object_put(vm->scratch[i]); | |
2c86e55d MA |
346 | } |
347 | ||
348 | void gtt_write_workarounds(struct intel_gt *gt) | |
349 | { | |
350 | struct drm_i915_private *i915 = gt->i915; | |
351 | struct intel_uncore *uncore = gt->uncore; | |
352 | ||
353 | /* | |
354 | * This function is for gtt related workarounds. This function is | |
355 | * called on driver load and after a GPU reset, so you can place | |
356 | * workarounds here even if they get overwritten by GPU reset. | |
357 | */ | |
358 | /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ | |
359 | if (IS_BROADWELL(i915)) | |
360 | intel_uncore_write(uncore, | |
361 | GEN8_L3_LRA_1_GPGPU, | |
362 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); | |
363 | else if (IS_CHERRYVIEW(i915)) | |
364 | intel_uncore_write(uncore, | |
365 | GEN8_L3_LRA_1_GPGPU, | |
366 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); | |
367 | else if (IS_GEN9_LP(i915)) | |
368 | intel_uncore_write(uncore, | |
369 | GEN8_L3_LRA_1_GPGPU, | |
370 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); | |
c816723b | 371 | else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) |
2c86e55d MA |
372 | intel_uncore_write(uncore, |
373 | GEN8_L3_LRA_1_GPGPU, | |
374 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); | |
375 | ||
376 | /* | |
377 | * To support 64K PTEs we need to first enable the use of the | |
378 | * Intermediate-Page-Size(IPS) bit of the PDE field via some magical | |
379 | * mmio, otherwise the page-walker will simply ignore the IPS bit. This | |
380 | * shouldn't be needed after GEN10. | |
381 | * | |
382 | * 64K pages were first introduced from BDW+, although technically they | |
383 | * only *work* from gen9+. For pre-BDW we instead have the option for | |
384 | * 32K pages, but we don't currently have any support for it in our | |
385 | * driver. | |
386 | */ | |
387 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && | |
c816723b | 388 | GRAPHICS_VER(i915) <= 10) |
2c86e55d MA |
389 | intel_uncore_rmw(uncore, |
390 | GEN8_GAMW_ECO_DEV_RW_IA, | |
391 | 0, | |
392 | GAMW_ECO_ENABLE_64K_IPS_FIELD); | |
393 | ||
c816723b | 394 | if (IS_GRAPHICS_VER(i915, 8, 11)) { |
2c86e55d MA |
395 | bool can_use_gtt_cache = true; |
396 | ||
397 | /* | |
398 | * According to the BSpec if we use 2M/1G pages then we also | |
399 | * need to disable the GTT cache. At least on BDW we can see | |
400 | * visual corruption when using 2M pages, and not disabling the | |
401 | * GTT cache. | |
402 | */ | |
403 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) | |
404 | can_use_gtt_cache = false; | |
405 | ||
406 | /* WaGttCachingOffByDefault */ | |
407 | intel_uncore_write(uncore, | |
408 | HSW_GTT_CACHE_EN, | |
409 | can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); | |
0d4c351a PB |
410 | drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && |
411 | intel_uncore_read(uncore, | |
412 | HSW_GTT_CACHE_EN) == 0); | |
2c86e55d MA |
413 | } |
414 | } | |
415 | ||
2c86e55d MA |
416 | static void tgl_setup_private_ppat(struct intel_uncore *uncore) |
417 | { | |
418 | /* TGL doesn't support LLC or AGE settings */ | |
419 | intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); | |
420 | intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); | |
421 | intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); | |
422 | intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); | |
423 | intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); | |
424 | intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); | |
425 | intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); | |
426 | intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); | |
427 | } | |
428 | ||
6266992c | 429 | static void icl_setup_private_ppat(struct intel_uncore *uncore) |
2c86e55d MA |
430 | { |
431 | intel_uncore_write(uncore, | |
432 | GEN10_PAT_INDEX(0), | |
433 | GEN8_PPAT_WB | GEN8_PPAT_LLC); | |
434 | intel_uncore_write(uncore, | |
435 | GEN10_PAT_INDEX(1), | |
436 | GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); | |
437 | intel_uncore_write(uncore, | |
438 | GEN10_PAT_INDEX(2), | |
c0888e9e | 439 | GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
2c86e55d MA |
440 | intel_uncore_write(uncore, |
441 | GEN10_PAT_INDEX(3), | |
442 | GEN8_PPAT_UC); | |
443 | intel_uncore_write(uncore, | |
444 | GEN10_PAT_INDEX(4), | |
445 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); | |
446 | intel_uncore_write(uncore, | |
447 | GEN10_PAT_INDEX(5), | |
448 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); | |
449 | intel_uncore_write(uncore, | |
450 | GEN10_PAT_INDEX(6), | |
451 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); | |
452 | intel_uncore_write(uncore, | |
453 | GEN10_PAT_INDEX(7), | |
454 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
455 | } | |
456 | ||
457 | /* | |
458 | * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability | |
459 | * bits. When using advanced contexts each context stores its own PAT, but | |
460 | * writing this data shouldn't be harmful even in those cases. | |
461 | */ | |
462 | static void bdw_setup_private_ppat(struct intel_uncore *uncore) | |
463 | { | |
c0888e9e | 464 | struct drm_i915_private *i915 = uncore->i915; |
2c86e55d MA |
465 | u64 pat; |
466 | ||
467 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ | |
468 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ | |
2c86e55d MA |
469 | GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ |
470 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | | |
471 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | | |
472 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | | |
473 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
474 | ||
c0888e9e | 475 | /* for scanout with eLLC */ |
c816723b | 476 | if (GRAPHICS_VER(i915) >= 9) |
c0888e9e VS |
477 | pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
478 | else | |
479 | pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); | |
480 | ||
2c86e55d MA |
481 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); |
482 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
483 | } | |
484 | ||
485 | static void chv_setup_private_ppat(struct intel_uncore *uncore) | |
486 | { | |
487 | u64 pat; | |
488 | ||
489 | /* | |
490 | * Map WB on BDW to snooped on CHV. | |
491 | * | |
492 | * Only the snoop bit has meaning for CHV, the rest is | |
493 | * ignored. | |
494 | * | |
495 | * The hardware will never snoop for certain types of accesses: | |
496 | * - CPU GTT (GMADR->GGTT->no snoop->memory) | |
497 | * - PPGTT page tables | |
498 | * - some other special cycles | |
499 | * | |
500 | * As with BDW, we also need to consider the following for GT accesses: | |
501 | * "For GGTT, there is NO pat_sel[2:0] from the entry, | |
502 | * so RTL will always use the value corresponding to | |
503 | * pat_sel = 000". | |
504 | * Which means we must set the snoop bit in PAT entry 0 | |
505 | * in order to keep the global status page working. | |
506 | */ | |
507 | ||
508 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | | |
509 | GEN8_PPAT(1, 0) | | |
510 | GEN8_PPAT(2, 0) | | |
511 | GEN8_PPAT(3, 0) | | |
512 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | | |
513 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | | |
514 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | | |
515 | GEN8_PPAT(7, CHV_PPAT_SNOOP); | |
516 | ||
517 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); | |
518 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
519 | } | |
520 | ||
521 | void setup_private_pat(struct intel_uncore *uncore) | |
522 | { | |
523 | struct drm_i915_private *i915 = uncore->i915; | |
524 | ||
c816723b | 525 | GEM_BUG_ON(GRAPHICS_VER(i915) < 8); |
2c86e55d | 526 | |
c816723b | 527 | if (GRAPHICS_VER(i915) >= 12) |
2c86e55d | 528 | tgl_setup_private_ppat(uncore); |
6266992c LDM |
529 | else if (GRAPHICS_VER(i915) >= 11) |
530 | icl_setup_private_ppat(uncore); | |
2c86e55d MA |
531 | else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) |
532 | chv_setup_private_ppat(uncore); | |
533 | else | |
534 | bdw_setup_private_ppat(uncore); | |
535 | } | |
536 | ||
a4d86249 CW |
537 | struct i915_vma * |
538 | __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) | |
539 | { | |
540 | struct drm_i915_gem_object *obj; | |
541 | struct i915_vma *vma; | |
a4d86249 CW |
542 | |
543 | obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); | |
544 | if (IS_ERR(obj)) | |
545 | return ERR_CAST(obj); | |
546 | ||
547 | i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); | |
548 | ||
549 | vma = i915_vma_instance(obj, vm, NULL); | |
550 | if (IS_ERR(vma)) { | |
551 | i915_gem_object_put(obj); | |
552 | return vma; | |
553 | } | |
554 | ||
2a665968 ML |
555 | return vma; |
556 | } | |
557 | ||
558 | struct i915_vma * | |
559 | __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) | |
560 | { | |
561 | struct i915_vma *vma; | |
562 | int err; | |
563 | ||
564 | vma = __vm_create_scratch_for_read(vm, size); | |
565 | if (IS_ERR(vma)) | |
566 | return vma; | |
567 | ||
a4d86249 CW |
568 | err = i915_vma_pin(vma, 0, 0, |
569 | i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); | |
570 | if (err) { | |
571 | i915_vma_put(vma); | |
572 | return ERR_PTR(err); | |
573 | } | |
574 | ||
575 | return vma; | |
576 | } | |
577 | ||
2c86e55d MA |
578 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
579 | #include "selftests/mock_gtt.c" | |
580 | #endif |