Commit | Line | Data |
---|---|---|
2c86e55d MA |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
5 | ||
2c86e55d | 6 | #include <asm/set_memory.h> |
8801eb48 | 7 | #include <asm/smp.h> |
9ce07d94 LDM |
8 | #include <linux/types.h> |
9 | #include <linux/stop_machine.h> | |
2c86e55d | 10 | |
0f857158 | 11 | #include <drm/drm_managed.h> |
83d2bdb6 | 12 | #include <drm/i915_drm.h> |
9ce07d94 | 13 | #include <drm/intel-gtt.h> |
83d2bdb6 | 14 | |
acc855d3 | 15 | #include "display/intel_display.h" |
e762bdf5 MA |
16 | #include "gem/i915_gem_lmem.h" |
17 | ||
9ce07d94 | 18 | #include "intel_ggtt_gmch.h" |
2c86e55d | 19 | #include "intel_gt.h" |
0d6419e9 | 20 | #include "intel_gt_regs.h" |
6bba2b30 | 21 | #include "intel_pci_config.h" |
2c86e55d | 22 | #include "i915_drv.h" |
1bba7323 | 23 | #include "i915_pci.h" |
2c86e55d | 24 | #include "i915_scatterlist.h" |
a7f46d5b | 25 | #include "i915_utils.h" |
2c86e55d MA |
26 | #include "i915_vgpu.h" |
27 | ||
28 | #include "intel_gtt.h" | |
33e7a975 | 29 | #include "gen8_ppgtt.h" |
2c86e55d | 30 | |
2c86e55d MA |
31 | static void i915_ggtt_color_adjust(const struct drm_mm_node *node, |
32 | unsigned long color, | |
33 | u64 *start, | |
34 | u64 *end) | |
35 | { | |
36 | if (i915_node_color_differs(node, color)) | |
37 | *start += I915_GTT_PAGE_SIZE; | |
38 | ||
39 | /* | |
40 | * Also leave a space between the unallocated reserved node after the | |
41 | * GTT and any objects within the GTT, i.e. we use the color adjustment | |
42 | * to insert a guard page to prevent prefetches crossing over the | |
43 | * GTT boundary. | |
44 | */ | |
45 | node = list_next_entry(node, node_list); | |
46 | if (node->color != color) | |
47 | *end -= I915_GTT_PAGE_SIZE; | |
48 | } | |
49 | ||
50 | static int ggtt_init_hw(struct i915_ggtt *ggtt) | |
51 | { | |
52 | struct drm_i915_private *i915 = ggtt->vm.i915; | |
53 | ||
54 | i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); | |
55 | ||
56 | ggtt->vm.is_ggtt = true; | |
57 | ||
58 | /* Only VLV supports read-only GGTT mappings */ | |
59 | ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); | |
60 | ||
61 | if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) | |
62 | ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; | |
63 | ||
64 | if (ggtt->mappable_end) { | |
65 | if (!io_mapping_init_wc(&ggtt->iomap, | |
66 | ggtt->gmadr.start, | |
67 | ggtt->mappable_end)) { | |
68 | ggtt->vm.cleanup(&ggtt->vm); | |
69 | return -EIO; | |
70 | } | |
71 | ||
72 | ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, | |
73 | ggtt->mappable_end); | |
74 | } | |
75 | ||
f899f786 | 76 | intel_ggtt_init_fences(ggtt); |
2c86e55d MA |
77 | |
78 | return 0; | |
79 | } | |
80 | ||
81 | /** | |
82 | * i915_ggtt_init_hw - Initialize GGTT hardware | |
83 | * @i915: i915 device | |
84 | */ | |
85 | int i915_ggtt_init_hw(struct drm_i915_private *i915) | |
86 | { | |
87 | int ret; | |
88 | ||
2c86e55d MA |
89 | /* |
90 | * Note that we use page colouring to enforce a guard page at the | |
91 | * end of the address space. This is required as the CS may prefetch | |
92 | * beyond the end of the batch buffer, across the page boundary, | |
93 | * and beyond the end of the GTT if we do not provide a guard. | |
94 | */ | |
848915c3 | 95 | ret = ggtt_init_hw(to_gt(i915)->ggtt); |
2c86e55d MA |
96 | if (ret) |
97 | return ret; | |
98 | ||
99 | return 0; | |
100 | } | |
101 | ||
8d2f683f ID |
102 | /** |
103 | * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM | |
104 | * @vm: The VM to suspend the mappings for | |
105 | * | |
106 | * Suspend the memory mappings for all objects mapped to HW via the GGTT or a | |
107 | * DPT page table. | |
108 | */ | |
109 | void i915_ggtt_suspend_vm(struct i915_address_space *vm) | |
2c86e55d | 110 | { |
bffa18dd | 111 | struct i915_vma *vma, *vn; |
e1a7ab4f | 112 | int save_skip_rewrite; |
e3793468 | 113 | |
8d2f683f ID |
114 | drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); |
115 | ||
0f341974 ML |
116 | retry: |
117 | i915_gem_drain_freed_objects(vm->i915); | |
118 | ||
8d2f683f | 119 | mutex_lock(&vm->mutex); |
bffa18dd | 120 | |
e1a7ab4f TH |
121 | /* |
122 | * Skip rewriting PTE on VMA unbind. | |
123 | * FIXME: Use an argument to i915_vma_unbind() instead? | |
124 | */ | |
125 | save_skip_rewrite = vm->skip_pte_rewrite; | |
126 | vm->skip_pte_rewrite = true; | |
bffa18dd | 127 | |
8d2f683f | 128 | list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { |
0f341974 ML |
129 | struct drm_i915_gem_object *obj = vma->obj; |
130 | ||
bffa18dd | 131 | GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); |
e3793468 | 132 | |
0f341974 | 133 | if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) |
bffa18dd CW |
134 | continue; |
135 | ||
0f341974 ML |
136 | /* unlikely to race when GPU is idle, so no worry about slowpath.. */ |
137 | if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { | |
138 | /* | |
139 | * No dead objects should appear here, GPU should be | |
140 | * completely idle, and userspace suspended | |
141 | */ | |
142 | i915_gem_object_get(obj); | |
143 | ||
0f341974 ML |
144 | mutex_unlock(&vm->mutex); |
145 | ||
146 | i915_gem_object_lock(obj, NULL); | |
e1a7ab4f | 147 | GEM_WARN_ON(i915_vma_unbind(vma)); |
0f341974 | 148 | i915_gem_object_unlock(obj); |
0f341974 | 149 | i915_gem_object_put(obj); |
e1a7ab4f TH |
150 | |
151 | vm->skip_pte_rewrite = save_skip_rewrite; | |
0f341974 ML |
152 | goto retry; |
153 | } | |
154 | ||
bffa18dd | 155 | if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { |
0f341974 ML |
156 | i915_vma_wait_for_bind(vma); |
157 | ||
2f6b90da | 158 | __i915_vma_evict(vma, false); |
bffa18dd CW |
159 | drm_mm_remove_node(&vma->node); |
160 | } | |
0f341974 ML |
161 | |
162 | i915_gem_object_unlock(obj); | |
bffa18dd CW |
163 | } |
164 | ||
de3a9ab9 | 165 | vm->clear_range(vm, 0, vm->total); |
bffa18dd | 166 | |
e1a7ab4f | 167 | vm->skip_pte_rewrite = save_skip_rewrite; |
8d2f683f ID |
168 | |
169 | mutex_unlock(&vm->mutex); | |
170 | } | |
171 | ||
172 | void i915_ggtt_suspend(struct i915_ggtt *ggtt) | |
173 | { | |
0f857158 AI |
174 | struct intel_gt *gt; |
175 | ||
8d2f683f ID |
176 | i915_ggtt_suspend_vm(&ggtt->vm); |
177 | ggtt->invalidate(ggtt); | |
2c86e55d | 178 | |
0f857158 AI |
179 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) |
180 | intel_gt_check_and_clear_faults(gt); | |
2c86e55d MA |
181 | } |
182 | ||
183 | void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) | |
184 | { | |
185 | struct intel_uncore *uncore = ggtt->vm.gt->uncore; | |
186 | ||
187 | spin_lock_irq(&uncore->lock); | |
188 | intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); | |
189 | intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); | |
190 | spin_unlock_irq(&uncore->lock); | |
191 | } | |
192 | ||
9ce07d94 | 193 | static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) |
2c86e55d MA |
194 | { |
195 | struct intel_uncore *uncore = ggtt->vm.gt->uncore; | |
196 | ||
197 | /* | |
198 | * Note that as an uncached mmio write, this will flush the | |
199 | * WCB of the writes into the GGTT before it triggers the invalidate. | |
200 | */ | |
201 | intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); | |
202 | } | |
203 | ||
204 | static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) | |
205 | { | |
2c86e55d MA |
206 | struct drm_i915_private *i915 = ggtt->vm.i915; |
207 | ||
208 | gen8_ggtt_invalidate(ggtt); | |
209 | ||
0f857158 AI |
210 | if (GRAPHICS_VER(i915) >= 12) { |
211 | struct intel_gt *gt; | |
212 | ||
213 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) | |
214 | intel_uncore_write_fw(gt->uncore, | |
215 | GEN12_GUC_TLB_INV_CR, | |
216 | GEN12_GUC_TLB_INV_CR_INVALIDATE); | |
217 | } else { | |
218 | intel_uncore_write_fw(ggtt->vm.gt->uncore, | |
219 | GEN8_GTCR, GEN8_GTCR_INVALIDATE); | |
220 | } | |
2c86e55d MA |
221 | } |
222 | ||
341ad0e8 | 223 | static u64 mtl_ggtt_pte_encode(dma_addr_t addr, |
9275277d | 224 | unsigned int pat_index, |
341ad0e8 FY |
225 | u32 flags) |
226 | { | |
227 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; | |
228 | ||
229 | WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK); | |
230 | ||
231 | if (flags & PTE_LM) | |
232 | pte |= GEN12_GGTT_PTE_LM; | |
233 | ||
9275277d | 234 | if (pat_index & BIT(0)) |
341ad0e8 | 235 | pte |= MTL_GGTT_PTE_PAT0; |
9275277d FY |
236 | |
237 | if (pat_index & BIT(1)) | |
238 | pte |= MTL_GGTT_PTE_PAT1; | |
341ad0e8 FY |
239 | |
240 | return pte; | |
241 | } | |
242 | ||
33e7a975 | 243 | u64 gen8_ggtt_pte_encode(dma_addr_t addr, |
9275277d | 244 | unsigned int pat_index, |
33e7a975 | 245 | u32 flags) |
69edc390 | 246 | { |
5f978167 | 247 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; |
e762bdf5 MA |
248 | |
249 | if (flags & PTE_LM) | |
250 | pte |= GEN12_GGTT_PTE_LM; | |
251 | ||
252 | return pte; | |
69edc390 DCS |
253 | } |
254 | ||
9ce07d94 LDM |
255 | static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) |
256 | { | |
257 | writeq(pte, addr); | |
258 | } | |
259 | ||
260 | static void gen8_ggtt_insert_page(struct i915_address_space *vm, | |
261 | dma_addr_t addr, | |
262 | u64 offset, | |
9275277d | 263 | unsigned int pat_index, |
9ce07d94 LDM |
264 | u32 flags) |
265 | { | |
266 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | |
267 | gen8_pte_t __iomem *pte = | |
268 | (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; | |
269 | ||
9275277d | 270 | gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags)); |
9ce07d94 LDM |
271 | |
272 | ggtt->invalidate(ggtt); | |
273 | } | |
274 | ||
275 | static void gen8_ggtt_insert_entries(struct i915_address_space *vm, | |
276 | struct i915_vma_resource *vma_res, | |
9275277d | 277 | unsigned int pat_index, |
9ce07d94 LDM |
278 | u32 flags) |
279 | { | |
9ce07d94 | 280 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
9275277d | 281 | const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); |
9ce07d94 LDM |
282 | gen8_pte_t __iomem *gte; |
283 | gen8_pte_t __iomem *end; | |
284 | struct sgt_iter iter; | |
285 | dma_addr_t addr; | |
286 | ||
287 | /* | |
288 | * Note that we ignore PTE_READ_ONLY here. The caller must be careful | |
289 | * not to allow the user to override access to a read only page. | |
290 | */ | |
291 | ||
292 | gte = (gen8_pte_t __iomem *)ggtt->gsm; | |
61102251 CW |
293 | gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; |
294 | end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; | |
295 | while (gte < end) | |
296 | gen8_set_pte(gte++, vm->scratch[0]->encode); | |
297 | end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; | |
9ce07d94 LDM |
298 | |
299 | for_each_sgt_daddr(addr, iter, vma_res->bi.pages) | |
300 | gen8_set_pte(gte++, pte_encode | addr); | |
301 | GEM_BUG_ON(gte > end); | |
302 | ||
303 | /* Fill the allocated but "unused" space beyond the end of the buffer */ | |
304 | while (gte < end) | |
305 | gen8_set_pte(gte++, vm->scratch[0]->encode); | |
306 | ||
307 | /* | |
308 | * We want to flush the TLBs only after we're certain all the PTE | |
309 | * updates have finished. | |
310 | */ | |
311 | ggtt->invalidate(ggtt); | |
312 | } | |
313 | ||
b288d740 AH |
314 | static void gen8_ggtt_clear_range(struct i915_address_space *vm, |
315 | u64 start, u64 length) | |
316 | { | |
317 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | |
318 | unsigned int first_entry = start / I915_GTT_PAGE_SIZE; | |
319 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; | |
320 | const gen8_pte_t scratch_pte = vm->scratch[0]->encode; | |
321 | gen8_pte_t __iomem *gtt_base = | |
322 | (gen8_pte_t __iomem *)ggtt->gsm + first_entry; | |
323 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; | |
324 | int i; | |
325 | ||
326 | if (WARN(num_entries > max_entries, | |
327 | "First entry = %d; Num entries = %d (max=%d)\n", | |
328 | first_entry, num_entries, max_entries)) | |
329 | num_entries = max_entries; | |
330 | ||
331 | for (i = 0; i < num_entries; i++) | |
332 | gen8_set_pte(>t_base[i], scratch_pte); | |
333 | } | |
334 | ||
9ce07d94 LDM |
335 | static void gen6_ggtt_insert_page(struct i915_address_space *vm, |
336 | dma_addr_t addr, | |
337 | u64 offset, | |
9275277d | 338 | unsigned int pat_index, |
9ce07d94 LDM |
339 | u32 flags) |
340 | { | |
341 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | |
342 | gen6_pte_t __iomem *pte = | |
343 | (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; | |
344 | ||
9275277d | 345 | iowrite32(vm->pte_encode(addr, pat_index, flags), pte); |
9ce07d94 LDM |
346 | |
347 | ggtt->invalidate(ggtt); | |
348 | } | |
349 | ||
350 | /* | |
351 | * Binds an object into the global gtt with the specified cache level. | |
352 | * The object will be accessible to the GPU via commands whose operands | |
353 | * reference offsets within the global GTT as well as accessible by the GPU | |
354 | * through the GMADR mapped BAR (i915->mm.gtt->gtt). | |
355 | */ | |
356 | static void gen6_ggtt_insert_entries(struct i915_address_space *vm, | |
357 | struct i915_vma_resource *vma_res, | |
9275277d | 358 | unsigned int pat_index, |
9ce07d94 LDM |
359 | u32 flags) |
360 | { | |
361 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | |
362 | gen6_pte_t __iomem *gte; | |
363 | gen6_pte_t __iomem *end; | |
364 | struct sgt_iter iter; | |
365 | dma_addr_t addr; | |
366 | ||
367 | gte = (gen6_pte_t __iomem *)ggtt->gsm; | |
61102251 | 368 | gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; |
9ce07d94 | 369 | |
61102251 CW |
370 | end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; |
371 | while (gte < end) | |
372 | iowrite32(vm->scratch[0]->encode, gte++); | |
373 | end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; | |
9ce07d94 | 374 | for_each_sgt_daddr(addr, iter, vma_res->bi.pages) |
9275277d | 375 | iowrite32(vm->pte_encode(addr, pat_index, flags), gte++); |
9ce07d94 LDM |
376 | GEM_BUG_ON(gte > end); |
377 | ||
378 | /* Fill the allocated but "unused" space beyond the end of the buffer */ | |
379 | while (gte < end) | |
380 | iowrite32(vm->scratch[0]->encode, gte++); | |
381 | ||
382 | /* | |
383 | * We want to flush the TLBs only after we're certain all the PTE | |
384 | * updates have finished. | |
385 | */ | |
386 | ggtt->invalidate(ggtt); | |
387 | } | |
388 | ||
389 | static void nop_clear_range(struct i915_address_space *vm, | |
390 | u64 start, u64 length) | |
391 | { | |
392 | } | |
393 | ||
9ce07d94 LDM |
394 | static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) |
395 | { | |
396 | /* | |
397 | * Make sure the internal GAM fifo has been cleared of all GTT | |
398 | * writes before exiting stop_machine(). This guarantees that | |
399 | * any aperture accesses waiting to start in another process | |
400 | * cannot back up behind the GTT writes causing a hang. | |
401 | * The register can be any arbitrary GAM register. | |
402 | */ | |
403 | intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); | |
404 | } | |
405 | ||
406 | struct insert_page { | |
407 | struct i915_address_space *vm; | |
408 | dma_addr_t addr; | |
409 | u64 offset; | |
9275277d | 410 | unsigned int pat_index; |
9ce07d94 LDM |
411 | }; |
412 | ||
413 | static int bxt_vtd_ggtt_insert_page__cb(void *_arg) | |
414 | { | |
415 | struct insert_page *arg = _arg; | |
416 | ||
9275277d FY |
417 | gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, |
418 | arg->pat_index, 0); | |
9ce07d94 LDM |
419 | bxt_vtd_ggtt_wa(arg->vm); |
420 | ||
421 | return 0; | |
422 | } | |
423 | ||
424 | static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, | |
425 | dma_addr_t addr, | |
426 | u64 offset, | |
9275277d | 427 | unsigned int pat_index, |
9ce07d94 LDM |
428 | u32 unused) |
429 | { | |
9275277d | 430 | struct insert_page arg = { vm, addr, offset, pat_index }; |
9ce07d94 LDM |
431 | |
432 | stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); | |
433 | } | |
434 | ||
435 | struct insert_entries { | |
436 | struct i915_address_space *vm; | |
437 | struct i915_vma_resource *vma_res; | |
9275277d | 438 | unsigned int pat_index; |
9ce07d94 LDM |
439 | u32 flags; |
440 | }; | |
441 | ||
442 | static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) | |
443 | { | |
444 | struct insert_entries *arg = _arg; | |
445 | ||
9275277d FY |
446 | gen8_ggtt_insert_entries(arg->vm, arg->vma_res, |
447 | arg->pat_index, arg->flags); | |
9ce07d94 LDM |
448 | bxt_vtd_ggtt_wa(arg->vm); |
449 | ||
450 | return 0; | |
451 | } | |
452 | ||
453 | static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, | |
454 | struct i915_vma_resource *vma_res, | |
9275277d | 455 | unsigned int pat_index, |
9ce07d94 LDM |
456 | u32 flags) |
457 | { | |
9275277d | 458 | struct insert_entries arg = { vm, vma_res, pat_index, flags }; |
9ce07d94 LDM |
459 | |
460 | stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); | |
461 | } | |
462 | ||
463 | static void gen6_ggtt_clear_range(struct i915_address_space *vm, | |
464 | u64 start, u64 length) | |
465 | { | |
466 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | |
467 | unsigned int first_entry = start / I915_GTT_PAGE_SIZE; | |
468 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; | |
469 | gen6_pte_t scratch_pte, __iomem *gtt_base = | |
470 | (gen6_pte_t __iomem *)ggtt->gsm + first_entry; | |
471 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; | |
472 | int i; | |
473 | ||
474 | if (WARN(num_entries > max_entries, | |
475 | "First entry = %d; Num entries = %d (max=%d)\n", | |
476 | first_entry, num_entries, max_entries)) | |
477 | num_entries = max_entries; | |
478 | ||
479 | scratch_pte = vm->scratch[0]->encode; | |
480 | for (i = 0; i < num_entries; i++) | |
481 | iowrite32(scratch_pte, >t_base[i]); | |
482 | } | |
483 | ||
7a5c9223 | 484 | void intel_ggtt_bind_vma(struct i915_address_space *vm, |
9ce07d94 LDM |
485 | struct i915_vm_pt_stash *stash, |
486 | struct i915_vma_resource *vma_res, | |
9275277d | 487 | unsigned int pat_index, |
9ce07d94 | 488 | u32 flags) |
2c86e55d | 489 | { |
2c86e55d MA |
490 | u32 pte_flags; |
491 | ||
39a2bd34 | 492 | if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK)) |
cd0452aa | 493 | return; |
bf0840cd | 494 | |
39a2bd34 TH |
495 | vma_res->bound_flags |= flags; |
496 | ||
2c86e55d MA |
497 | /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ |
498 | pte_flags = 0; | |
39a2bd34 | 499 | if (vma_res->bi.readonly) |
2c86e55d | 500 | pte_flags |= PTE_READ_ONLY; |
39a2bd34 | 501 | if (vma_res->bi.lmem) |
e762bdf5 | 502 | pte_flags |= PTE_LM; |
2c86e55d | 503 | |
9275277d | 504 | vm->insert_entries(vm, vma_res, pat_index, pte_flags); |
39a2bd34 | 505 | vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; |
2c86e55d MA |
506 | } |
507 | ||
7a5c9223 | 508 | void intel_ggtt_unbind_vma(struct i915_address_space *vm, |
9ce07d94 | 509 | struct i915_vma_resource *vma_res) |
2c86e55d | 510 | { |
39a2bd34 | 511 | vm->clear_range(vm, vma_res->start, vma_res->vma_size); |
2c86e55d MA |
512 | } |
513 | ||
b7599d24 JP |
514 | /* |
515 | * Reserve the top of the GuC address space for firmware images. Addresses | |
516 | * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, | |
517 | * which makes for a suitable range to hold GuC/HuC firmware images if the | |
518 | * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT | |
519 | * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk | |
520 | * of the same size anyway, which is far more than needed, to keep the logic | |
521 | * in uc_fw_ggtt_offset() simple. | |
522 | */ | |
523 | #define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) | |
524 | ||
2c86e55d MA |
525 | static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) |
526 | { | |
b7599d24 | 527 | u64 offset; |
2c86e55d MA |
528 | int ret; |
529 | ||
34bbfde6 | 530 | if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) |
2c86e55d MA |
531 | return 0; |
532 | ||
b7599d24 JP |
533 | GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); |
534 | offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; | |
2c86e55d | 535 | |
b7599d24 JP |
536 | ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, |
537 | GUC_TOP_RESERVE_SIZE, offset, | |
538 | I915_COLOR_UNEVICTABLE, PIN_NOEVICT); | |
2c86e55d | 539 | if (ret) |
52ce7074 WK |
540 | drm_dbg(&ggtt->vm.i915->drm, |
541 | "Failed to reserve top of GGTT for GuC\n"); | |
2c86e55d MA |
542 | |
543 | return ret; | |
544 | } | |
545 | ||
546 | static void ggtt_release_guc_top(struct i915_ggtt *ggtt) | |
547 | { | |
548 | if (drm_mm_node_allocated(&ggtt->uc_fw)) | |
549 | drm_mm_remove_node(&ggtt->uc_fw); | |
550 | } | |
551 | ||
552 | static void cleanup_init_ggtt(struct i915_ggtt *ggtt) | |
553 | { | |
554 | ggtt_release_guc_top(ggtt); | |
555 | if (drm_mm_node_allocated(&ggtt->error_capture)) | |
556 | drm_mm_remove_node(&ggtt->error_capture); | |
742379c0 | 557 | mutex_destroy(&ggtt->error_mutex); |
2c86e55d MA |
558 | } |
559 | ||
560 | static int init_ggtt(struct i915_ggtt *ggtt) | |
561 | { | |
562 | /* | |
563 | * Let GEM Manage all of the aperture. | |
564 | * | |
565 | * However, leave one page at the end still bound to the scratch page. | |
566 | * There are a number of places where the hardware apparently prefetches | |
567 | * past the end of the object, and we've seen multiple hangs with the | |
568 | * GPU head pointer stuck in a batchbuffer bound at the last page of the | |
569 | * aperture. One page should be enough to keep any prefetching inside | |
570 | * of the aperture. | |
571 | */ | |
572 | unsigned long hole_start, hole_end; | |
573 | struct drm_mm_node *entry; | |
574 | int ret; | |
575 | ||
576 | /* | |
577 | * GuC requires all resources that we're sharing with it to be placed in | |
578 | * non-WOPCM memory. If GuC is not present or not in use we still need a | |
579 | * small bias as ring wraparound at offset 0 sometimes hangs. No idea | |
580 | * why. | |
581 | */ | |
582 | ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, | |
ee71434e | 583 | intel_wopcm_guc_size(&ggtt->vm.gt->wopcm)); |
2c86e55d MA |
584 | |
585 | ret = intel_vgt_balloon(ggtt); | |
586 | if (ret) | |
587 | return ret; | |
588 | ||
742379c0 | 589 | mutex_init(&ggtt->error_mutex); |
2c86e55d | 590 | if (ggtt->mappable_end) { |
489140b5 CW |
591 | /* |
592 | * Reserve a mappable slot for our lockless error capture. | |
593 | * | |
594 | * We strongly prefer taking address 0x0 in order to protect | |
595 | * other critical buffers against accidental overwrites, | |
596 | * as writing to address 0 is a very common mistake. | |
597 | * | |
598 | * Since 0 may already be in use by the system (e.g. the BIOS | |
599 | * framebuffer), we let the reservation fail quietly and hope | |
600 | * 0 remains reserved always. | |
601 | * | |
602 | * If we fail to reserve 0, and then fail to find any space | |
603 | * for an error-capture, remain silent. We can afford not | |
604 | * to reserve an error_capture node as we have fallback | |
605 | * paths, and we trust that 0 will remain reserved. However, | |
606 | * the only likely reason for failure to insert is a driver | |
607 | * bug, which we expect to cause other failures... | |
72f6107d AH |
608 | * |
609 | * Since CPU can perform speculative reads on error capture | |
610 | * (write-combining allows it) add scratch page after error | |
611 | * capture to avoid DMAR errors. | |
489140b5 | 612 | */ |
72f6107d | 613 | ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE; |
489140b5 CW |
614 | ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; |
615 | if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) | |
616 | drm_mm_insert_node_in_range(&ggtt->vm.mm, | |
617 | &ggtt->error_capture, | |
618 | ggtt->error_capture.size, 0, | |
619 | ggtt->error_capture.color, | |
620 | 0, ggtt->mappable_end, | |
621 | DRM_MM_INSERT_LOW); | |
2c86e55d | 622 | } |
72f6107d AH |
623 | if (drm_mm_node_allocated(&ggtt->error_capture)) { |
624 | u64 start = ggtt->error_capture.start; | |
625 | u64 size = ggtt->error_capture.size; | |
626 | ||
627 | ggtt->vm.scratch_range(&ggtt->vm, start, size); | |
489140b5 CW |
628 | drm_dbg(&ggtt->vm.i915->drm, |
629 | "Reserved GGTT:[%llx, %llx] for use by error capture\n", | |
72f6107d AH |
630 | start, start + size); |
631 | } | |
2c86e55d MA |
632 | |
633 | /* | |
634 | * The upper portion of the GuC address space has a sizeable hole | |
635 | * (several MB) that is inaccessible by GuC. Reserve this range within | |
636 | * GGTT as it can comfortably hold GuC/HuC firmware images. | |
637 | */ | |
638 | ret = ggtt_reserve_guc_top(ggtt); | |
639 | if (ret) | |
640 | goto err; | |
641 | ||
642 | /* Clear any non-preallocated blocks */ | |
643 | drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { | |
489140b5 CW |
644 | drm_dbg(&ggtt->vm.i915->drm, |
645 | "clearing unused GTT space: [%lx, %lx]\n", | |
646 | hole_start, hole_end); | |
2c86e55d MA |
647 | ggtt->vm.clear_range(&ggtt->vm, hole_start, |
648 | hole_end - hole_start); | |
649 | } | |
650 | ||
651 | /* And finally clear the reserved guard page */ | |
652 | ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); | |
653 | ||
654 | return 0; | |
655 | ||
656 | err: | |
657 | cleanup_init_ggtt(ggtt); | |
658 | return ret; | |
659 | } | |
660 | ||
cd0452aa CW |
661 | static void aliasing_gtt_bind_vma(struct i915_address_space *vm, |
662 | struct i915_vm_pt_stash *stash, | |
39a2bd34 | 663 | struct i915_vma_resource *vma_res, |
9275277d | 664 | unsigned int pat_index, |
cd0452aa | 665 | u32 flags) |
2c86e55d | 666 | { |
2c86e55d | 667 | u32 pte_flags; |
2c86e55d MA |
668 | |
669 | /* Currently applicable only to VLV */ | |
670 | pte_flags = 0; | |
39a2bd34 | 671 | if (vma_res->bi.readonly) |
2c86e55d MA |
672 | pte_flags |= PTE_READ_ONLY; |
673 | ||
cd0452aa CW |
674 | if (flags & I915_VMA_LOCAL_BIND) |
675 | ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, | |
9275277d | 676 | stash, vma_res, pat_index, flags); |
2c86e55d | 677 | |
c0e60347 | 678 | if (flags & I915_VMA_GLOBAL_BIND) |
9275277d | 679 | vm->insert_entries(vm, vma_res, pat_index, pte_flags); |
39a2bd34 TH |
680 | |
681 | vma_res->bound_flags |= flags; | |
2c86e55d MA |
682 | } |
683 | ||
12b07256 | 684 | static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, |
39a2bd34 | 685 | struct i915_vma_resource *vma_res) |
2c86e55d | 686 | { |
39a2bd34 TH |
687 | if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND) |
688 | vm->clear_range(vm, vma_res->start, vma_res->vma_size); | |
2c86e55d | 689 | |
39a2bd34 TH |
690 | if (vma_res->bound_flags & I915_VMA_LOCAL_BIND) |
691 | ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res); | |
2c86e55d MA |
692 | } |
693 | ||
694 | static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) | |
695 | { | |
cd0452aa | 696 | struct i915_vm_pt_stash stash = {}; |
2c86e55d MA |
697 | struct i915_ppgtt *ppgtt; |
698 | int err; | |
699 | ||
a259cc14 | 700 | ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); |
2c86e55d MA |
701 | if (IS_ERR(ppgtt)) |
702 | return PTR_ERR(ppgtt); | |
703 | ||
704 | if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { | |
705 | err = -ENODEV; | |
706 | goto err_ppgtt; | |
707 | } | |
708 | ||
cd0452aa CW |
709 | err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); |
710 | if (err) | |
711 | goto err_ppgtt; | |
712 | ||
26ad4f8b | 713 | i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); |
529b9ec8 | 714 | err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); |
26ad4f8b | 715 | i915_gem_object_unlock(ppgtt->vm.scratch[0]); |
89351925 CW |
716 | if (err) |
717 | goto err_stash; | |
718 | ||
2c86e55d MA |
719 | /* |
720 | * Note we only pre-allocate as far as the end of the global | |
721 | * GTT. On 48b / 4-level page-tables, the difference is very, | |
722 | * very significant! We have to preallocate as GVT/vgpu does | |
723 | * not like the page directory disappearing. | |
724 | */ | |
cd0452aa | 725 | ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); |
2c86e55d MA |
726 | |
727 | ggtt->alias = ppgtt; | |
728 | ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; | |
729 | ||
7a5c9223 | 730 | GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma); |
2c86e55d MA |
731 | ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; |
732 | ||
7a5c9223 | 733 | GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma); |
2c86e55d MA |
734 | ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; |
735 | ||
cd0452aa | 736 | i915_vm_free_pt_stash(&ppgtt->vm, &stash); |
2c86e55d MA |
737 | return 0; |
738 | ||
89351925 CW |
739 | err_stash: |
740 | i915_vm_free_pt_stash(&ppgtt->vm, &stash); | |
2c86e55d MA |
741 | err_ppgtt: |
742 | i915_vm_put(&ppgtt->vm); | |
743 | return err; | |
744 | } | |
745 | ||
746 | static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) | |
747 | { | |
748 | struct i915_ppgtt *ppgtt; | |
749 | ||
750 | ppgtt = fetch_and_zero(&ggtt->alias); | |
751 | if (!ppgtt) | |
752 | return; | |
753 | ||
754 | i915_vm_put(&ppgtt->vm); | |
755 | ||
7a5c9223 CB |
756 | ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; |
757 | ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; | |
2c86e55d MA |
758 | } |
759 | ||
760 | int i915_init_ggtt(struct drm_i915_private *i915) | |
761 | { | |
762 | int ret; | |
763 | ||
848915c3 | 764 | ret = init_ggtt(to_gt(i915)->ggtt); |
2c86e55d MA |
765 | if (ret) |
766 | return ret; | |
767 | ||
768 | if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { | |
848915c3 | 769 | ret = init_aliasing_ppgtt(to_gt(i915)->ggtt); |
2c86e55d | 770 | if (ret) |
848915c3 | 771 | cleanup_init_ggtt(to_gt(i915)->ggtt); |
2c86e55d MA |
772 | } |
773 | ||
774 | return 0; | |
775 | } | |
776 | ||
777 | static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) | |
778 | { | |
779 | struct i915_vma *vma, *vn; | |
780 | ||
2c86e55d | 781 | flush_workqueue(ggtt->vm.i915->wq); |
0f341974 | 782 | i915_gem_drain_freed_objects(ggtt->vm.i915); |
2c86e55d MA |
783 | |
784 | mutex_lock(&ggtt->vm.mutex); | |
785 | ||
e1a7ab4f TH |
786 | ggtt->vm.skip_pte_rewrite = true; |
787 | ||
0f341974 ML |
788 | list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { |
789 | struct drm_i915_gem_object *obj = vma->obj; | |
790 | bool trylock; | |
791 | ||
792 | trylock = i915_gem_object_trylock(obj, NULL); | |
793 | WARN_ON(!trylock); | |
794 | ||
2c86e55d | 795 | WARN_ON(__i915_vma_unbind(vma)); |
0f341974 ML |
796 | if (trylock) |
797 | i915_gem_object_unlock(obj); | |
798 | } | |
2c86e55d MA |
799 | |
800 | if (drm_mm_node_allocated(&ggtt->error_capture)) | |
801 | drm_mm_remove_node(&ggtt->error_capture); | |
742379c0 | 802 | mutex_destroy(&ggtt->error_mutex); |
2c86e55d MA |
803 | |
804 | ggtt_release_guc_top(ggtt); | |
805 | intel_vgt_deballoon(ggtt); | |
806 | ||
807 | ggtt->vm.cleanup(&ggtt->vm); | |
808 | ||
809 | mutex_unlock(&ggtt->vm.mutex); | |
810 | i915_address_space_fini(&ggtt->vm); | |
811 | ||
812 | arch_phys_wc_del(ggtt->mtrr); | |
813 | ||
814 | if (ggtt->iomap.size) | |
815 | io_mapping_fini(&ggtt->iomap); | |
816 | } | |
817 | ||
818 | /** | |
819 | * i915_ggtt_driver_release - Clean up GGTT hardware initialization | |
820 | * @i915: i915 device | |
821 | */ | |
822 | void i915_ggtt_driver_release(struct drm_i915_private *i915) | |
823 | { | |
848915c3 | 824 | struct i915_ggtt *ggtt = to_gt(i915)->ggtt; |
2c86e55d | 825 | |
0b6bc81d | 826 | fini_aliasing_ppgtt(ggtt); |
2c86e55d | 827 | |
0b6bc81d CW |
828 | intel_ggtt_fini_fences(ggtt); |
829 | ggtt_cleanup_hw(ggtt); | |
2c86e55d MA |
830 | } |
831 | ||
4d8151ae TH |
832 | /** |
833 | * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after | |
834 | * all free objects have been drained. | |
835 | * @i915: i915 device | |
836 | */ | |
837 | void i915_ggtt_driver_late_release(struct drm_i915_private *i915) | |
838 | { | |
848915c3 | 839 | struct i915_ggtt *ggtt = to_gt(i915)->ggtt; |
4d8151ae TH |
840 | |
841 | GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); | |
842 | dma_resv_fini(&ggtt->vm._resv); | |
843 | } | |
844 | ||
9ce07d94 LDM |
845 | static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) |
846 | { | |
847 | snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; | |
848 | snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; | |
849 | return snb_gmch_ctl << 20; | |
850 | } | |
851 | ||
852 | static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) | |
853 | { | |
854 | bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; | |
855 | bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; | |
856 | if (bdw_gmch_ctl) | |
857 | bdw_gmch_ctl = 1 << bdw_gmch_ctl; | |
858 | ||
859 | #ifdef CONFIG_X86_32 | |
860 | /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ | |
861 | if (bdw_gmch_ctl > 4) | |
862 | bdw_gmch_ctl = 4; | |
863 | #endif | |
864 | ||
865 | return bdw_gmch_ctl << 20; | |
866 | } | |
867 | ||
868 | static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) | |
869 | { | |
870 | gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; | |
871 | gmch_ctrl &= SNB_GMCH_GGMS_MASK; | |
872 | ||
873 | if (gmch_ctrl) | |
874 | return 1 << (20 + gmch_ctrl); | |
875 | ||
876 | return 0; | |
877 | } | |
878 | ||
879 | static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) | |
880 | { | |
881 | /* | |
882 | * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset | |
883 | * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset | |
884 | */ | |
885 | GEM_BUG_ON(GRAPHICS_VER(i915) < 6); | |
886 | return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; | |
887 | } | |
888 | ||
889 | static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) | |
890 | { | |
891 | return gen6_gttmmadr_size(i915) / 2; | |
892 | } | |
893 | ||
894 | static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) | |
895 | { | |
896 | struct drm_i915_private *i915 = ggtt->vm.i915; | |
897 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); | |
898 | phys_addr_t phys_addr; | |
899 | u32 pte_flags; | |
900 | int ret; | |
901 | ||
0492a34c VS |
902 | GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); |
903 | phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); | |
9ce07d94 LDM |
904 | |
905 | /* | |
906 | * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range | |
907 | * will be dropped. For WC mappings in general we have 64 byte burst | |
908 | * writes when the WC buffer is flushed, so we can't use it, but have to | |
909 | * resort to an uncached mapping. The WC issue is easily caught by the | |
910 | * readback check when writing GTT PTE entries. | |
911 | */ | |
912 | if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) | |
913 | ggtt->gsm = ioremap(phys_addr, size); | |
914 | else | |
915 | ggtt->gsm = ioremap_wc(phys_addr, size); | |
916 | if (!ggtt->gsm) { | |
917 | drm_err(&i915->drm, "Failed to map the ggtt page table\n"); | |
918 | return -ENOMEM; | |
919 | } | |
920 | ||
921 | kref_init(&ggtt->vm.resv_ref); | |
922 | ret = setup_scratch_page(&ggtt->vm); | |
923 | if (ret) { | |
924 | drm_err(&i915->drm, "Scratch setup failed\n"); | |
925 | /* iounmap will also get called at remove, but meh */ | |
926 | iounmap(ggtt->gsm); | |
927 | return ret; | |
928 | } | |
929 | ||
930 | pte_flags = 0; | |
931 | if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) | |
932 | pte_flags |= PTE_LM; | |
933 | ||
934 | ggtt->vm.scratch[0]->encode = | |
935 | ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), | |
9275277d FY |
936 | i915_gem_get_pat_index(i915, |
937 | I915_CACHE_NONE), | |
938 | pte_flags); | |
9ce07d94 LDM |
939 | |
940 | return 0; | |
941 | } | |
942 | ||
943 | static void gen6_gmch_remove(struct i915_address_space *vm) | |
944 | { | |
945 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | |
946 | ||
947 | iounmap(ggtt->gsm); | |
948 | free_scratch(vm); | |
949 | } | |
950 | ||
951 | static struct resource pci_resource(struct pci_dev *pdev, int bar) | |
2c86e55d | 952 | { |
e5405178 JN |
953 | return DEFINE_RES_MEM(pci_resource_start(pdev, bar), |
954 | pci_resource_len(pdev, bar)); | |
2c86e55d MA |
955 | } |
956 | ||
9ce07d94 LDM |
957 | static int gen8_gmch_probe(struct i915_ggtt *ggtt) |
958 | { | |
959 | struct drm_i915_private *i915 = ggtt->vm.i915; | |
960 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); | |
961 | unsigned int size; | |
962 | u16 snb_gmch_ctl; | |
963 | ||
03eababb | 964 | if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { |
0492a34c | 965 | if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) |
1bba7323 PP |
966 | return -ENXIO; |
967 | ||
0492a34c | 968 | ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); |
9ce07d94 LDM |
969 | ggtt->mappable_end = resource_size(&ggtt->gmadr); |
970 | } | |
971 | ||
972 | pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); | |
973 | if (IS_CHERRYVIEW(i915)) | |
974 | size = chv_get_total_gtt_size(snb_gmch_ctl); | |
975 | else | |
976 | size = gen8_get_total_gtt_size(snb_gmch_ctl); | |
977 | ||
978 | ggtt->vm.alloc_pt_dma = alloc_pt_dma; | |
979 | ggtt->vm.alloc_scratch_dma = alloc_pt_dma; | |
980 | ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; | |
981 | ||
982 | ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; | |
983 | ggtt->vm.cleanup = gen6_gmch_remove; | |
984 | ggtt->vm.insert_page = gen8_ggtt_insert_page; | |
985 | ggtt->vm.clear_range = nop_clear_range; | |
b288d740 | 986 | ggtt->vm.scratch_range = gen8_ggtt_clear_range; |
9ce07d94 LDM |
987 | |
988 | ggtt->vm.insert_entries = gen8_ggtt_insert_entries; | |
989 | ||
990 | /* | |
991 | * Serialize GTT updates with aperture access on BXT if VT-d is on, | |
992 | * and always on CHV. | |
993 | */ | |
994 | if (intel_vm_no_concurrent_access_wa(i915)) { | |
995 | ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; | |
996 | ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; | |
a0696856 ND |
997 | |
998 | /* | |
999 | * Calling stop_machine() version of GGTT update function | |
1000 | * at error capture/reset path will raise lockdep warning. | |
1001 | * Allow calling gen8_ggtt_insert_* directly at reset path | |
1002 | * which is safe from parallel GGTT updates. | |
1003 | */ | |
1004 | ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; | |
1005 | ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries; | |
1006 | ||
9ce07d94 LDM |
1007 | ggtt->vm.bind_async_flags = |
1008 | I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; | |
1009 | } | |
1010 | ||
f2053d34 DCS |
1011 | if (intel_uc_wants_guc(&ggtt->vm.gt->uc)) |
1012 | ggtt->invalidate = guc_ggtt_invalidate; | |
1013 | else | |
1014 | ggtt->invalidate = gen8_ggtt_invalidate; | |
9ce07d94 LDM |
1015 | |
1016 | ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; | |
1017 | ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; | |
1018 | ||
341ad0e8 FY |
1019 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) |
1020 | ggtt->vm.pte_encode = mtl_ggtt_pte_encode; | |
1021 | else | |
1022 | ggtt->vm.pte_encode = gen8_ggtt_pte_encode; | |
9ce07d94 | 1023 | |
9ce07d94 LDM |
1024 | return ggtt_probe_common(ggtt, size); |
1025 | } | |
1026 | ||
9275277d FY |
1027 | /* |
1028 | * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, | |
4722e2eb | 1029 | * so the switch-case statements in these PTE encode functions are still valid. |
9275277d FY |
1030 | * See translation table LEGACY_CACHELEVEL. |
1031 | */ | |
9ce07d94 | 1032 | static u64 snb_pte_encode(dma_addr_t addr, |
4722e2eb | 1033 | unsigned int pat_index, |
9ce07d94 LDM |
1034 | u32 flags) |
1035 | { | |
1036 | gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; | |
1037 | ||
4722e2eb | 1038 | switch (pat_index) { |
9ce07d94 LDM |
1039 | case I915_CACHE_L3_LLC: |
1040 | case I915_CACHE_LLC: | |
1041 | pte |= GEN6_PTE_CACHE_LLC; | |
1042 | break; | |
1043 | case I915_CACHE_NONE: | |
1044 | pte |= GEN6_PTE_UNCACHED; | |
1045 | break; | |
1046 | default: | |
4722e2eb | 1047 | MISSING_CASE(pat_index); |
9ce07d94 LDM |
1048 | } |
1049 | ||
1050 | return pte; | |
1051 | } | |
1052 | ||
1053 | static u64 ivb_pte_encode(dma_addr_t addr, | |
4722e2eb | 1054 | unsigned int pat_index, |
9ce07d94 LDM |
1055 | u32 flags) |
1056 | { | |
1057 | gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; | |
1058 | ||
4722e2eb | 1059 | switch (pat_index) { |
9ce07d94 LDM |
1060 | case I915_CACHE_L3_LLC: |
1061 | pte |= GEN7_PTE_CACHE_L3_LLC; | |
1062 | break; | |
1063 | case I915_CACHE_LLC: | |
1064 | pte |= GEN6_PTE_CACHE_LLC; | |
1065 | break; | |
1066 | case I915_CACHE_NONE: | |
1067 | pte |= GEN6_PTE_UNCACHED; | |
1068 | break; | |
1069 | default: | |
4722e2eb | 1070 | MISSING_CASE(pat_index); |
9ce07d94 LDM |
1071 | } |
1072 | ||
1073 | return pte; | |
1074 | } | |
1075 | ||
1076 | static u64 byt_pte_encode(dma_addr_t addr, | |
4722e2eb | 1077 | unsigned int pat_index, |
9ce07d94 LDM |
1078 | u32 flags) |
1079 | { | |
1080 | gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; | |
1081 | ||
1082 | if (!(flags & PTE_READ_ONLY)) | |
1083 | pte |= BYT_PTE_WRITEABLE; | |
1084 | ||
4722e2eb | 1085 | if (pat_index != I915_CACHE_NONE) |
9ce07d94 LDM |
1086 | pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; |
1087 | ||
1088 | return pte; | |
1089 | } | |
1090 | ||
1091 | static u64 hsw_pte_encode(dma_addr_t addr, | |
4722e2eb | 1092 | unsigned int pat_index, |
9ce07d94 LDM |
1093 | u32 flags) |
1094 | { | |
1095 | gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; | |
1096 | ||
4722e2eb | 1097 | if (pat_index != I915_CACHE_NONE) |
9ce07d94 LDM |
1098 | pte |= HSW_WB_LLC_AGE3; |
1099 | ||
1100 | return pte; | |
1101 | } | |
1102 | ||
1103 | static u64 iris_pte_encode(dma_addr_t addr, | |
4722e2eb | 1104 | unsigned int pat_index, |
9ce07d94 LDM |
1105 | u32 flags) |
1106 | { | |
1107 | gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; | |
1108 | ||
4722e2eb | 1109 | switch (pat_index) { |
9ce07d94 LDM |
1110 | case I915_CACHE_NONE: |
1111 | break; | |
1112 | case I915_CACHE_WT: | |
1113 | pte |= HSW_WT_ELLC_LLC_AGE3; | |
1114 | break; | |
1115 | default: | |
1116 | pte |= HSW_WB_ELLC_LLC_AGE3; | |
1117 | break; | |
1118 | } | |
1119 | ||
1120 | return pte; | |
1121 | } | |
1122 | ||
1123 | static int gen6_gmch_probe(struct i915_ggtt *ggtt) | |
1124 | { | |
1125 | struct drm_i915_private *i915 = ggtt->vm.i915; | |
1126 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); | |
1127 | unsigned int size; | |
1128 | u16 snb_gmch_ctl; | |
1129 | ||
0492a34c | 1130 | if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) |
1bba7323 PP |
1131 | return -ENXIO; |
1132 | ||
0492a34c | 1133 | ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); |
9ce07d94 LDM |
1134 | ggtt->mappable_end = resource_size(&ggtt->gmadr); |
1135 | ||
1136 | /* | |
1137 | * 64/512MB is the current min/max we actually know of, but this is | |
1138 | * just a coarse sanity check. | |
1139 | */ | |
1140 | if (ggtt->mappable_end < (64 << 20) || | |
1141 | ggtt->mappable_end > (512 << 20)) { | |
1142 | drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", | |
1143 | &ggtt->mappable_end); | |
1144 | return -ENXIO; | |
1145 | } | |
1146 | ||
1147 | pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); | |
1148 | ||
1149 | size = gen6_get_total_gtt_size(snb_gmch_ctl); | |
1150 | ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; | |
1151 | ||
1152 | ggtt->vm.alloc_pt_dma = alloc_pt_dma; | |
1153 | ggtt->vm.alloc_scratch_dma = alloc_pt_dma; | |
1154 | ||
1155 | ggtt->vm.clear_range = nop_clear_range; | |
eea380ad | 1156 | if (!HAS_FULL_PPGTT(i915)) |
9ce07d94 | 1157 | ggtt->vm.clear_range = gen6_ggtt_clear_range; |
b288d740 | 1158 | ggtt->vm.scratch_range = gen6_ggtt_clear_range; |
9ce07d94 LDM |
1159 | ggtt->vm.insert_page = gen6_ggtt_insert_page; |
1160 | ggtt->vm.insert_entries = gen6_ggtt_insert_entries; | |
1161 | ggtt->vm.cleanup = gen6_gmch_remove; | |
1162 | ||
1163 | ggtt->invalidate = gen6_ggtt_invalidate; | |
1164 | ||
1165 | if (HAS_EDRAM(i915)) | |
1166 | ggtt->vm.pte_encode = iris_pte_encode; | |
1167 | else if (IS_HASWELL(i915)) | |
1168 | ggtt->vm.pte_encode = hsw_pte_encode; | |
1169 | else if (IS_VALLEYVIEW(i915)) | |
1170 | ggtt->vm.pte_encode = byt_pte_encode; | |
1171 | else if (GRAPHICS_VER(i915) >= 7) | |
1172 | ggtt->vm.pte_encode = ivb_pte_encode; | |
1173 | else | |
1174 | ggtt->vm.pte_encode = snb_pte_encode; | |
1175 | ||
1176 | ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; | |
1177 | ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; | |
1178 | ||
1179 | return ggtt_probe_common(ggtt, size); | |
1180 | } | |
1181 | ||
2c86e55d MA |
1182 | static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) |
1183 | { | |
1184 | struct drm_i915_private *i915 = gt->i915; | |
1185 | int ret; | |
1186 | ||
1187 | ggtt->vm.gt = gt; | |
1188 | ggtt->vm.i915 = i915; | |
e322551f | 1189 | ggtt->vm.dma = i915->drm.dev; |
4d8151ae | 1190 | dma_resv_init(&ggtt->vm._resv); |
2c86e55d | 1191 | |
9ce07d94 LDM |
1192 | if (GRAPHICS_VER(i915) >= 8) |
1193 | ret = gen8_gmch_probe(ggtt); | |
1194 | else if (GRAPHICS_VER(i915) >= 6) | |
1195 | ret = gen6_gmch_probe(ggtt); | |
2c86e55d | 1196 | else |
9ce07d94 LDM |
1197 | ret = intel_ggtt_gmch_probe(ggtt); |
1198 | ||
26ad4f8b | 1199 | if (ret) { |
4d8151ae | 1200 | dma_resv_fini(&ggtt->vm._resv); |
2c86e55d | 1201 | return ret; |
26ad4f8b | 1202 | } |
2c86e55d MA |
1203 | |
1204 | if ((ggtt->vm.total - 1) >> 32) { | |
36034c95 WK |
1205 | drm_err(&i915->drm, |
1206 | "We never expected a Global GTT with more than 32bits" | |
1207 | " of address space! Found %lldM!\n", | |
1208 | ggtt->vm.total >> 20); | |
2c86e55d MA |
1209 | ggtt->vm.total = 1ULL << 32; |
1210 | ggtt->mappable_end = | |
1211 | min_t(u64, ggtt->mappable_end, ggtt->vm.total); | |
1212 | } | |
1213 | ||
1214 | if (ggtt->mappable_end > ggtt->vm.total) { | |
36034c95 WK |
1215 | drm_err(&i915->drm, |
1216 | "mappable aperture extends past end of GGTT," | |
1217 | " aperture=%pa, total=%llx\n", | |
1218 | &ggtt->mappable_end, ggtt->vm.total); | |
2c86e55d MA |
1219 | ggtt->mappable_end = ggtt->vm.total; |
1220 | } | |
1221 | ||
1222 | /* GMADR is the PCI mmio aperture into the global GTT. */ | |
36034c95 WK |
1223 | drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); |
1224 | drm_dbg(&i915->drm, "GMADR size = %lluM\n", | |
1225 | (u64)ggtt->mappable_end >> 20); | |
1226 | drm_dbg(&i915->drm, "DSM size = %lluM\n", | |
1227 | (u64)resource_size(&intel_graphics_stolen_res) >> 20); | |
2c86e55d MA |
1228 | |
1229 | return 0; | |
1230 | } | |
1231 | ||
1232 | /** | |
1233 | * i915_ggtt_probe_hw - Probe GGTT hardware location | |
1234 | * @i915: i915 device | |
1235 | */ | |
1236 | int i915_ggtt_probe_hw(struct drm_i915_private *i915) | |
1237 | { | |
0f857158 AI |
1238 | struct intel_gt *gt; |
1239 | int ret, i; | |
1240 | ||
1241 | for_each_gt(gt, i915, i) { | |
1242 | ret = intel_gt_assign_ggtt(gt); | |
1243 | if (ret) | |
1244 | return ret; | |
1245 | } | |
2c86e55d | 1246 | |
848915c3 | 1247 | ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); |
2c86e55d MA |
1248 | if (ret) |
1249 | return ret; | |
1250 | ||
a7f46d5b | 1251 | if (i915_vtd_active(i915)) |
dc483ba5 | 1252 | drm_info(&i915->drm, "VT-d active for gfx access\n"); |
2c86e55d MA |
1253 | |
1254 | return 0; | |
1255 | } | |
1256 | ||
0f857158 | 1257 | struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) |
2c86e55d | 1258 | { |
0f857158 | 1259 | struct i915_ggtt *ggtt; |
9ce07d94 | 1260 | |
0f857158 AI |
1261 | ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); |
1262 | if (!ggtt) | |
1263 | return ERR_PTR(-ENOMEM); | |
2c86e55d | 1264 | |
0f857158 | 1265 | INIT_LIST_HEAD(&ggtt->gt_list); |
2c86e55d | 1266 | |
0f857158 | 1267 | return ggtt; |
2c86e55d MA |
1268 | } |
1269 | ||
2c86e55d | 1270 | int i915_ggtt_enable_hw(struct drm_i915_private *i915) |
2c86e55d | 1271 | { |
9ce07d94 LDM |
1272 | if (GRAPHICS_VER(i915) < 6) |
1273 | return intel_ggtt_gmch_enable_hw(i915); | |
2c86e55d | 1274 | |
9ce07d94 | 1275 | return 0; |
2c86e55d MA |
1276 | } |
1277 | ||
8d2f683f ID |
1278 | /** |
1279 | * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM | |
1280 | * @vm: The VM to restore the mappings for | |
1281 | * | |
1282 | * Restore the memory mappings for all objects mapped to HW via the GGTT or a | |
1283 | * DPT page table. | |
1284 | * | |
1285 | * Returns %true if restoring the mapping for any object that was in a write | |
1286 | * domain before suspend. | |
1287 | */ | |
1288 | bool i915_ggtt_resume_vm(struct i915_address_space *vm) | |
2c86e55d | 1289 | { |
80e5351d | 1290 | struct i915_vma *vma; |
8d2f683f | 1291 | bool write_domain_objs = false; |
2c86e55d | 1292 | |
8d2f683f | 1293 | drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); |
2c86e55d | 1294 | |
de3a9ab9 AS |
1295 | /* First fill our portion of the GTT with scratch pages */ |
1296 | vm->clear_range(vm, 0, vm->total); | |
2c86e55d | 1297 | |
2c86e55d | 1298 | /* clflush objects bound into the GGTT and rebind them. */ |
8d2f683f | 1299 | list_for_each_entry(vma, &vm->bound_list, vm_link) { |
2c86e55d | 1300 | struct drm_i915_gem_object *obj = vma->obj; |
cd0452aa CW |
1301 | unsigned int was_bound = |
1302 | atomic_read(&vma->flags) & I915_VMA_BIND_MASK; | |
2c86e55d | 1303 | |
cd0452aa | 1304 | GEM_BUG_ON(!was_bound); |
de3a9ab9 AS |
1305 | |
1306 | /* | |
1307 | * Clear the bound flags of the vma resource to allow | |
1308 | * ptes to be repopulated. | |
1309 | */ | |
1310 | vma->resource->bound_flags = 0; | |
1311 | vma->ops->bind_vma(vm, NULL, vma->resource, | |
9275277d FY |
1312 | obj ? obj->pat_index : |
1313 | i915_gem_get_pat_index(vm->i915, | |
1314 | I915_CACHE_NONE), | |
de3a9ab9 AS |
1315 | was_bound); |
1316 | ||
2c86e55d | 1317 | if (obj) { /* only used during resume => exclusive access */ |
8d2f683f | 1318 | write_domain_objs |= fetch_and_zero(&obj->write_domain); |
2c86e55d MA |
1319 | obj->read_domains |= I915_GEM_DOMAIN_GTT; |
1320 | } | |
1321 | } | |
1322 | ||
8d2f683f ID |
1323 | return write_domain_objs; |
1324 | } | |
1325 | ||
1326 | void i915_ggtt_resume(struct i915_ggtt *ggtt) | |
1327 | { | |
0f857158 | 1328 | struct intel_gt *gt; |
8d2f683f ID |
1329 | bool flush; |
1330 | ||
0f857158 AI |
1331 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) |
1332 | intel_gt_check_and_clear_faults(gt); | |
8d2f683f ID |
1333 | |
1334 | flush = i915_ggtt_resume_vm(&ggtt->vm); | |
1335 | ||
72f6107d AH |
1336 | if (drm_mm_node_allocated(&ggtt->error_capture)) |
1337 | ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, | |
1338 | ggtt->error_capture.size); | |
1339 | ||
3532e75d DCS |
1340 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) |
1341 | intel_uc_resume_mappings(>->uc); | |
1342 | ||
2c86e55d MA |
1343 | ggtt->invalidate(ggtt); |
1344 | ||
2c86e55d MA |
1345 | if (flush) |
1346 | wbinvd_on_all_cpus(); | |
2c86e55d | 1347 | |
dec9cf9e | 1348 | intel_ggtt_restore_fences(ggtt); |
2c86e55d | 1349 | } |