Commit | Line | Data |
---|---|---|
2c86e55d MA |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include <linux/slab.h> /* fault-inject.h is not standalone! */ | |
7 | ||
8 | #include <linux/fault-inject.h> | |
9 | ||
10 | #include "i915_trace.h" | |
11 | #include "intel_gt.h" | |
12 | #include "intel_gtt.h" | |
13 | ||
89351925 | 14 | struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) |
2c86e55d | 15 | { |
2c86e55d MA |
16 | if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) |
17 | i915_gem_shrink_all(vm->i915); | |
18 | ||
89351925 | 19 | return i915_gem_object_create_internal(vm->i915, sz); |
2c86e55d MA |
20 | } |
21 | ||
89351925 | 22 | int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
2c86e55d | 23 | { |
89351925 | 24 | int err; |
2c86e55d | 25 | |
89351925 CW |
26 | err = i915_gem_object_pin_pages(obj); |
27 | if (err) | |
28 | return err; | |
2c86e55d | 29 | |
89351925 CW |
30 | i915_gem_object_make_unshrinkable(obj); |
31 | return 0; | |
2c86e55d MA |
32 | } |
33 | ||
34 | void __i915_vm_close(struct i915_address_space *vm) | |
35 | { | |
36 | struct i915_vma *vma, *vn; | |
37 | ||
ad2f9bc9 CW |
38 | if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) |
39 | return; | |
40 | ||
2c86e55d MA |
41 | list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { |
42 | struct drm_i915_gem_object *obj = vma->obj; | |
43 | ||
44 | /* Keep the obj (and hence the vma) alive as _we_ destroy it */ | |
45 | if (!kref_get_unless_zero(&obj->base.refcount)) | |
46 | continue; | |
47 | ||
48 | atomic_and(~I915_VMA_PIN_MASK, &vma->flags); | |
49 | WARN_ON(__i915_vma_unbind(vma)); | |
50 | __i915_vma_put(vma); | |
51 | ||
52 | i915_gem_object_put(obj); | |
53 | } | |
54 | GEM_BUG_ON(!list_empty(&vm->bound_list)); | |
ad2f9bc9 | 55 | |
2c86e55d MA |
56 | mutex_unlock(&vm->mutex); |
57 | } | |
58 | ||
59 | void i915_address_space_fini(struct i915_address_space *vm) | |
60 | { | |
2c86e55d | 61 | drm_mm_takedown(&vm->mm); |
2c86e55d MA |
62 | mutex_destroy(&vm->mutex); |
63 | } | |
64 | ||
65 | static void __i915_vm_release(struct work_struct *work) | |
66 | { | |
67 | struct i915_address_space *vm = | |
68 | container_of(work, struct i915_address_space, rcu.work); | |
69 | ||
70 | vm->cleanup(vm); | |
71 | i915_address_space_fini(vm); | |
72 | ||
73 | kfree(vm); | |
74 | } | |
75 | ||
76 | void i915_vm_release(struct kref *kref) | |
77 | { | |
78 | struct i915_address_space *vm = | |
79 | container_of(kref, struct i915_address_space, ref); | |
80 | ||
81 | GEM_BUG_ON(i915_is_ggtt(vm)); | |
82 | trace_i915_ppgtt_release(vm); | |
83 | ||
84 | queue_rcu_work(vm->i915->wq, &vm->rcu); | |
85 | } | |
86 | ||
87 | void i915_address_space_init(struct i915_address_space *vm, int subclass) | |
88 | { | |
89 | kref_init(&vm->ref); | |
90 | INIT_RCU_WORK(&vm->rcu, __i915_vm_release); | |
91 | atomic_set(&vm->open, 1); | |
92 | ||
93 | /* | |
94 | * The vm->mutex must be reclaim safe (for use in the shrinker). | |
95 | * Do a dummy acquire now under fs_reclaim so that any allocation | |
96 | * attempt holding the lock is immediately reported by lockdep. | |
97 | */ | |
98 | mutex_init(&vm->mutex); | |
99 | lockdep_set_subclass(&vm->mutex, subclass); | |
100 | i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); | |
101 | ||
102 | GEM_BUG_ON(!vm->total); | |
103 | drm_mm_init(&vm->mm, 0, vm->total); | |
104 | vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; | |
105 | ||
2c86e55d MA |
106 | INIT_LIST_HEAD(&vm->bound_list); |
107 | } | |
108 | ||
109 | void clear_pages(struct i915_vma *vma) | |
110 | { | |
111 | GEM_BUG_ON(!vma->pages); | |
112 | ||
113 | if (vma->pages != vma->obj->mm.pages) { | |
114 | sg_free_table(vma->pages); | |
115 | kfree(vma->pages); | |
116 | } | |
117 | vma->pages = NULL; | |
118 | ||
119 | memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); | |
120 | } | |
121 | ||
89351925 | 122 | dma_addr_t __px_dma(struct drm_i915_gem_object *p) |
2c86e55d | 123 | { |
89351925 CW |
124 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
125 | return sg_dma_address(p->mm.pages->sgl); | |
2c86e55d MA |
126 | } |
127 | ||
89351925 | 128 | struct page *__px_page(struct drm_i915_gem_object *p) |
2c86e55d | 129 | { |
89351925 CW |
130 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
131 | return sg_page(p->mm.pages->sgl); | |
2c86e55d MA |
132 | } |
133 | ||
134 | void | |
89351925 | 135 | fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) |
2c86e55d | 136 | { |
89351925 CW |
137 | struct page *page = __px_page(p); |
138 | void *vaddr; | |
139 | ||
140 | vaddr = kmap(page); | |
141 | memset64(vaddr, val, count); | |
142 | clflush_cache_range(vaddr, PAGE_SIZE); | |
143 | kunmap(page); | |
2c86e55d MA |
144 | } |
145 | ||
89351925 | 146 | static void poison_scratch_page(struct drm_i915_gem_object *scratch) |
82d71e31 | 147 | { |
89351925 CW |
148 | struct sgt_iter sgt; |
149 | struct page *page; | |
150 | u8 val; | |
82d71e31 | 151 | |
89351925 CW |
152 | val = 0; |
153 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | |
154 | val = POISON_FREE; | |
82d71e31 | 155 | |
89351925 | 156 | for_each_sgt_page(page, sgt, scratch->mm.pages) { |
82d71e31 CW |
157 | void *vaddr; |
158 | ||
159 | vaddr = kmap(page); | |
89351925 | 160 | memset(vaddr, val, PAGE_SIZE); |
82d71e31 | 161 | kunmap(page); |
89351925 | 162 | } |
82d71e31 CW |
163 | } |
164 | ||
89351925 | 165 | int setup_scratch_page(struct i915_address_space *vm) |
2c86e55d MA |
166 | { |
167 | unsigned long size; | |
168 | ||
169 | /* | |
170 | * In order to utilize 64K pages for an object with a size < 2M, we will | |
171 | * need to support a 64K scratch page, given that every 16th entry for a | |
172 | * page-table operating in 64K mode must point to a properly aligned 64K | |
173 | * region, including any PTEs which happen to point to scratch. | |
174 | * | |
175 | * This is only relevant for the 48b PPGTT where we support | |
176 | * huge-gtt-pages, see also i915_vma_insert(). However, as we share the | |
177 | * scratch (read-only) between all vm, we create one 64k scratch page | |
178 | * for all. | |
179 | */ | |
180 | size = I915_GTT_PAGE_SIZE_4K; | |
181 | if (i915_vm_is_4lvl(vm) && | |
89351925 | 182 | HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) |
2c86e55d | 183 | size = I915_GTT_PAGE_SIZE_64K; |
2c86e55d MA |
184 | |
185 | do { | |
89351925 | 186 | struct drm_i915_gem_object *obj; |
2c86e55d | 187 | |
89351925 CW |
188 | obj = vm->alloc_pt_dma(vm, size); |
189 | if (IS_ERR(obj)) | |
2c86e55d MA |
190 | goto skip; |
191 | ||
89351925 CW |
192 | if (pin_pt_dma(vm, obj)) |
193 | goto skip_obj; | |
194 | ||
195 | /* We need a single contiguous page for our scratch */ | |
196 | if (obj->mm.page_sizes.sg < size) | |
197 | goto skip_obj; | |
198 | ||
199 | /* And it needs to be correspondingly aligned */ | |
200 | if (__px_dma(obj) & (size - 1)) | |
201 | goto skip_obj; | |
202 | ||
82d71e31 CW |
203 | /* |
204 | * Use a non-zero scratch page for debugging. | |
205 | * | |
206 | * We want a value that should be reasonably obvious | |
207 | * to spot in the error state, while also causing a GPU hang | |
208 | * if executed. We prefer using a clear page in production, so | |
209 | * should it ever be accidentally used, the effect should be | |
210 | * fairly benign. | |
211 | */ | |
89351925 CW |
212 | poison_scratch_page(obj); |
213 | ||
214 | vm->scratch[0] = obj; | |
215 | vm->scratch_order = get_order(size); | |
2c86e55d MA |
216 | return 0; |
217 | ||
89351925 CW |
218 | skip_obj: |
219 | i915_gem_object_put(obj); | |
2c86e55d MA |
220 | skip: |
221 | if (size == I915_GTT_PAGE_SIZE_4K) | |
222 | return -ENOMEM; | |
223 | ||
224 | size = I915_GTT_PAGE_SIZE_4K; | |
2c86e55d MA |
225 | } while (1); |
226 | } | |
227 | ||
2c86e55d MA |
228 | void free_scratch(struct i915_address_space *vm) |
229 | { | |
230 | int i; | |
231 | ||
89351925 CW |
232 | for (i = 0; i <= vm->top; i++) |
233 | i915_gem_object_put(vm->scratch[i]); | |
2c86e55d MA |
234 | } |
235 | ||
236 | void gtt_write_workarounds(struct intel_gt *gt) | |
237 | { | |
238 | struct drm_i915_private *i915 = gt->i915; | |
239 | struct intel_uncore *uncore = gt->uncore; | |
240 | ||
241 | /* | |
242 | * This function is for gtt related workarounds. This function is | |
243 | * called on driver load and after a GPU reset, so you can place | |
244 | * workarounds here even if they get overwritten by GPU reset. | |
245 | */ | |
246 | /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ | |
247 | if (IS_BROADWELL(i915)) | |
248 | intel_uncore_write(uncore, | |
249 | GEN8_L3_LRA_1_GPGPU, | |
250 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); | |
251 | else if (IS_CHERRYVIEW(i915)) | |
252 | intel_uncore_write(uncore, | |
253 | GEN8_L3_LRA_1_GPGPU, | |
254 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); | |
255 | else if (IS_GEN9_LP(i915)) | |
256 | intel_uncore_write(uncore, | |
257 | GEN8_L3_LRA_1_GPGPU, | |
258 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); | |
259 | else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) | |
260 | intel_uncore_write(uncore, | |
261 | GEN8_L3_LRA_1_GPGPU, | |
262 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); | |
263 | ||
264 | /* | |
265 | * To support 64K PTEs we need to first enable the use of the | |
266 | * Intermediate-Page-Size(IPS) bit of the PDE field via some magical | |
267 | * mmio, otherwise the page-walker will simply ignore the IPS bit. This | |
268 | * shouldn't be needed after GEN10. | |
269 | * | |
270 | * 64K pages were first introduced from BDW+, although technically they | |
271 | * only *work* from gen9+. For pre-BDW we instead have the option for | |
272 | * 32K pages, but we don't currently have any support for it in our | |
273 | * driver. | |
274 | */ | |
275 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && | |
276 | INTEL_GEN(i915) <= 10) | |
277 | intel_uncore_rmw(uncore, | |
278 | GEN8_GAMW_ECO_DEV_RW_IA, | |
279 | 0, | |
280 | GAMW_ECO_ENABLE_64K_IPS_FIELD); | |
281 | ||
282 | if (IS_GEN_RANGE(i915, 8, 11)) { | |
283 | bool can_use_gtt_cache = true; | |
284 | ||
285 | /* | |
286 | * According to the BSpec if we use 2M/1G pages then we also | |
287 | * need to disable the GTT cache. At least on BDW we can see | |
288 | * visual corruption when using 2M pages, and not disabling the | |
289 | * GTT cache. | |
290 | */ | |
291 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) | |
292 | can_use_gtt_cache = false; | |
293 | ||
294 | /* WaGttCachingOffByDefault */ | |
295 | intel_uncore_write(uncore, | |
296 | HSW_GTT_CACHE_EN, | |
297 | can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); | |
0d4c351a PB |
298 | drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && |
299 | intel_uncore_read(uncore, | |
300 | HSW_GTT_CACHE_EN) == 0); | |
2c86e55d MA |
301 | } |
302 | } | |
303 | ||
2c86e55d MA |
304 | static void tgl_setup_private_ppat(struct intel_uncore *uncore) |
305 | { | |
306 | /* TGL doesn't support LLC or AGE settings */ | |
307 | intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); | |
308 | intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); | |
309 | intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); | |
310 | intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); | |
311 | intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); | |
312 | intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); | |
313 | intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); | |
314 | intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); | |
315 | } | |
316 | ||
317 | static void cnl_setup_private_ppat(struct intel_uncore *uncore) | |
318 | { | |
319 | intel_uncore_write(uncore, | |
320 | GEN10_PAT_INDEX(0), | |
321 | GEN8_PPAT_WB | GEN8_PPAT_LLC); | |
322 | intel_uncore_write(uncore, | |
323 | GEN10_PAT_INDEX(1), | |
324 | GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); | |
325 | intel_uncore_write(uncore, | |
326 | GEN10_PAT_INDEX(2), | |
c0888e9e | 327 | GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
2c86e55d MA |
328 | intel_uncore_write(uncore, |
329 | GEN10_PAT_INDEX(3), | |
330 | GEN8_PPAT_UC); | |
331 | intel_uncore_write(uncore, | |
332 | GEN10_PAT_INDEX(4), | |
333 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); | |
334 | intel_uncore_write(uncore, | |
335 | GEN10_PAT_INDEX(5), | |
336 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); | |
337 | intel_uncore_write(uncore, | |
338 | GEN10_PAT_INDEX(6), | |
339 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); | |
340 | intel_uncore_write(uncore, | |
341 | GEN10_PAT_INDEX(7), | |
342 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
343 | } | |
344 | ||
345 | /* | |
346 | * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability | |
347 | * bits. When using advanced contexts each context stores its own PAT, but | |
348 | * writing this data shouldn't be harmful even in those cases. | |
349 | */ | |
350 | static void bdw_setup_private_ppat(struct intel_uncore *uncore) | |
351 | { | |
c0888e9e | 352 | struct drm_i915_private *i915 = uncore->i915; |
2c86e55d MA |
353 | u64 pat; |
354 | ||
355 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ | |
356 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ | |
2c86e55d MA |
357 | GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ |
358 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | | |
359 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | | |
360 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | | |
361 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
362 | ||
c0888e9e VS |
363 | /* for scanout with eLLC */ |
364 | if (INTEL_GEN(i915) >= 9) | |
365 | pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); | |
366 | else | |
367 | pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); | |
368 | ||
2c86e55d MA |
369 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); |
370 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
371 | } | |
372 | ||
373 | static void chv_setup_private_ppat(struct intel_uncore *uncore) | |
374 | { | |
375 | u64 pat; | |
376 | ||
377 | /* | |
378 | * Map WB on BDW to snooped on CHV. | |
379 | * | |
380 | * Only the snoop bit has meaning for CHV, the rest is | |
381 | * ignored. | |
382 | * | |
383 | * The hardware will never snoop for certain types of accesses: | |
384 | * - CPU GTT (GMADR->GGTT->no snoop->memory) | |
385 | * - PPGTT page tables | |
386 | * - some other special cycles | |
387 | * | |
388 | * As with BDW, we also need to consider the following for GT accesses: | |
389 | * "For GGTT, there is NO pat_sel[2:0] from the entry, | |
390 | * so RTL will always use the value corresponding to | |
391 | * pat_sel = 000". | |
392 | * Which means we must set the snoop bit in PAT entry 0 | |
393 | * in order to keep the global status page working. | |
394 | */ | |
395 | ||
396 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | | |
397 | GEN8_PPAT(1, 0) | | |
398 | GEN8_PPAT(2, 0) | | |
399 | GEN8_PPAT(3, 0) | | |
400 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | | |
401 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | | |
402 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | | |
403 | GEN8_PPAT(7, CHV_PPAT_SNOOP); | |
404 | ||
405 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); | |
406 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
407 | } | |
408 | ||
409 | void setup_private_pat(struct intel_uncore *uncore) | |
410 | { | |
411 | struct drm_i915_private *i915 = uncore->i915; | |
412 | ||
413 | GEM_BUG_ON(INTEL_GEN(i915) < 8); | |
414 | ||
415 | if (INTEL_GEN(i915) >= 12) | |
416 | tgl_setup_private_ppat(uncore); | |
417 | else if (INTEL_GEN(i915) >= 10) | |
418 | cnl_setup_private_ppat(uncore); | |
419 | else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) | |
420 | chv_setup_private_ppat(uncore); | |
421 | else | |
422 | bdw_setup_private_ppat(uncore); | |
423 | } | |
424 | ||
425 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | |
426 | #include "selftests/mock_gtt.c" | |
427 | #endif |