Commit | Line | Data |
---|---|---|
2c86e55d MA |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2020 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include <linux/log2.h> | |
7 | ||
8 | #include "gen6_ppgtt.h" | |
9 | #include "i915_scatterlist.h" | |
10 | #include "i915_trace.h" | |
11 | #include "i915_vgpu.h" | |
12 | #include "intel_gt.h" | |
13 | ||
14 | /* Write pde (index) from the page directory @pd to the page table @pt */ | |
9834dfef CW |
15 | static void gen6_write_pde(const struct gen6_ppgtt *ppgtt, |
16 | const unsigned int pde, | |
17 | const struct i915_page_table *pt) | |
2c86e55d | 18 | { |
89351925 CW |
19 | dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]); |
20 | ||
2c86e55d | 21 | /* Caller needs to make sure the write completes if necessary */ |
89351925 | 22 | iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID, |
2c86e55d MA |
23 | ppgtt->pd_addr + pde); |
24 | } | |
25 | ||
26 | void gen7_ppgtt_enable(struct intel_gt *gt) | |
27 | { | |
28 | struct drm_i915_private *i915 = gt->i915; | |
29 | struct intel_uncore *uncore = gt->uncore; | |
2c86e55d MA |
30 | u32 ecochk; |
31 | ||
32 | intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); | |
33 | ||
34 | ecochk = intel_uncore_read(uncore, GAM_ECOCHK); | |
35 | if (IS_HASWELL(i915)) { | |
36 | ecochk |= ECOCHK_PPGTT_WB_HSW; | |
37 | } else { | |
38 | ecochk |= ECOCHK_PPGTT_LLC_IVB; | |
39 | ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; | |
40 | } | |
41 | intel_uncore_write(uncore, GAM_ECOCHK, ecochk); | |
2c86e55d MA |
42 | } |
43 | ||
44 | void gen6_ppgtt_enable(struct intel_gt *gt) | |
45 | { | |
46 | struct intel_uncore *uncore = gt->uncore; | |
47 | ||
48 | intel_uncore_rmw(uncore, | |
49 | GAC_ECO_BITS, | |
50 | 0, | |
51 | ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); | |
52 | ||
53 | intel_uncore_rmw(uncore, | |
54 | GAB_CTL, | |
55 | 0, | |
56 | GAB_CTL_CONT_AFTER_PAGEFAULT); | |
57 | ||
58 | intel_uncore_rmw(uncore, | |
59 | GAM_ECOCHK, | |
60 | 0, | |
61 | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); | |
62 | ||
63 | if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ | |
64 | intel_uncore_write(uncore, | |
65 | GFX_MODE, | |
66 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); | |
67 | } | |
68 | ||
69 | /* PPGTT support for Sandybdrige/Gen6 and later */ | |
70 | static void gen6_ppgtt_clear_range(struct i915_address_space *vm, | |
71 | u64 start, u64 length) | |
72 | { | |
73 | struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); | |
74 | const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; | |
89351925 | 75 | const gen6_pte_t scratch_pte = vm->scratch[0]->encode; |
2c86e55d MA |
76 | unsigned int pde = first_entry / GEN6_PTES; |
77 | unsigned int pte = first_entry % GEN6_PTES; | |
78 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; | |
79 | ||
80 | while (num_entries) { | |
81 | struct i915_page_table * const pt = | |
82 | i915_pt_entry(ppgtt->base.pd, pde++); | |
83 | const unsigned int count = min(num_entries, GEN6_PTES - pte); | |
84 | gen6_pte_t *vaddr; | |
85 | ||
2c86e55d MA |
86 | num_entries -= count; |
87 | ||
88 | GEM_BUG_ON(count > atomic_read(&pt->used)); | |
89 | if (!atomic_sub_return(count, &pt->used)) | |
90 | ppgtt->scan_for_unused_pt = true; | |
91 | ||
92 | /* | |
93 | * Note that the hw doesn't support removing PDE on the fly | |
94 | * (they are cached inside the context with no means to | |
95 | * invalidate the cache), so we can only reset the PTE | |
96 | * entries back to scratch. | |
97 | */ | |
98 | ||
529b9ec8 | 99 | vaddr = px_vaddr(pt); |
2c86e55d | 100 | memset32(vaddr + pte, scratch_pte, count); |
2c86e55d MA |
101 | |
102 | pte = 0; | |
103 | } | |
104 | } | |
105 | ||
106 | static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, | |
107 | struct i915_vma *vma, | |
108 | enum i915_cache_level cache_level, | |
109 | u32 flags) | |
110 | { | |
111 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); | |
112 | struct i915_page_directory * const pd = ppgtt->pd; | |
113 | unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE; | |
114 | unsigned int act_pt = first_entry / GEN6_PTES; | |
115 | unsigned int act_pte = first_entry % GEN6_PTES; | |
116 | const u32 pte_encode = vm->pte_encode(0, cache_level, flags); | |
117 | struct sgt_dma iter = sgt_dma(vma); | |
118 | gen6_pte_t *vaddr; | |
119 | ||
89351925 | 120 | GEM_BUG_ON(!pd->entry[act_pt]); |
2c86e55d | 121 | |
529b9ec8 | 122 | vaddr = px_vaddr(i915_pt_entry(pd, act_pt)); |
2c86e55d | 123 | do { |
8a473dba | 124 | GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE); |
2c86e55d MA |
125 | vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); |
126 | ||
127 | iter.dma += I915_GTT_PAGE_SIZE; | |
128 | if (iter.dma == iter.max) { | |
129 | iter.sg = __sg_next(iter.sg); | |
8a473dba | 130 | if (!iter.sg || sg_dma_len(iter.sg) == 0) |
2c86e55d MA |
131 | break; |
132 | ||
133 | iter.dma = sg_dma_address(iter.sg); | |
8a473dba | 134 | iter.max = iter.dma + sg_dma_len(iter.sg); |
2c86e55d MA |
135 | } |
136 | ||
137 | if (++act_pte == GEN6_PTES) { | |
529b9ec8 | 138 | vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt)); |
2c86e55d MA |
139 | act_pte = 0; |
140 | } | |
141 | } while (1); | |
2c86e55d MA |
142 | |
143 | vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; | |
144 | } | |
145 | ||
146 | static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) | |
147 | { | |
148 | struct i915_page_directory * const pd = ppgtt->base.pd; | |
149 | struct i915_page_table *pt; | |
150 | unsigned int pde; | |
151 | ||
152 | start = round_down(start, SZ_64K); | |
153 | end = round_up(end, SZ_64K) - start; | |
154 | ||
155 | mutex_lock(&ppgtt->flush); | |
156 | ||
157 | gen6_for_each_pde(pt, pd, start, end, pde) | |
158 | gen6_write_pde(ppgtt, pde, pt); | |
159 | ||
160 | mb(); | |
161 | ioread32(ppgtt->pd_addr + pde - 1); | |
162 | gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); | |
163 | mb(); | |
164 | ||
165 | mutex_unlock(&ppgtt->flush); | |
166 | } | |
167 | ||
cd0452aa CW |
168 | static void gen6_alloc_va_range(struct i915_address_space *vm, |
169 | struct i915_vm_pt_stash *stash, | |
170 | u64 start, u64 length) | |
2c86e55d MA |
171 | { |
172 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); | |
173 | struct i915_page_directory * const pd = ppgtt->base.pd; | |
cd0452aa | 174 | struct i915_page_table *pt; |
b297bde1 | 175 | bool flush = false; |
2c86e55d MA |
176 | u64 from = start; |
177 | unsigned int pde; | |
2c86e55d | 178 | |
2c86e55d MA |
179 | spin_lock(&pd->lock); |
180 | gen6_for_each_pde(pt, pd, start, length, pde) { | |
181 | const unsigned int count = gen6_pte_count(start, length); | |
182 | ||
89351925 | 183 | if (!pt) { |
2c86e55d MA |
184 | spin_unlock(&pd->lock); |
185 | ||
cd0452aa | 186 | pt = stash->pt[0]; |
89351925 | 187 | __i915_gem_object_pin_pages(pt->base); |
2c86e55d | 188 | |
89351925 | 189 | fill32_px(pt, vm->scratch[0]->encode); |
2c86e55d MA |
190 | |
191 | spin_lock(&pd->lock); | |
89351925 | 192 | if (!pd->entry[pde]) { |
cd0452aa CW |
193 | stash->pt[0] = pt->stash; |
194 | atomic_set(&pt->used, 0); | |
2c86e55d MA |
195 | pd->entry[pde] = pt; |
196 | } else { | |
2c86e55d MA |
197 | pt = pd->entry[pde]; |
198 | } | |
b297bde1 CW |
199 | |
200 | flush = true; | |
2c86e55d MA |
201 | } |
202 | ||
203 | atomic_add(count, &pt->used); | |
204 | } | |
205 | spin_unlock(&pd->lock); | |
206 | ||
b297bde1 CW |
207 | if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) { |
208 | intel_wakeref_t wakeref; | |
209 | ||
210 | with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref) | |
211 | gen6_flush_pd(ppgtt, from, start); | |
212 | } | |
2c86e55d MA |
213 | } |
214 | ||
215 | static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) | |
216 | { | |
217 | struct i915_address_space * const vm = &ppgtt->base.vm; | |
2c86e55d MA |
218 | int ret; |
219 | ||
89351925 | 220 | ret = setup_scratch_page(vm); |
2c86e55d MA |
221 | if (ret) |
222 | return ret; | |
223 | ||
89351925 CW |
224 | vm->scratch[0]->encode = |
225 | vm->pte_encode(px_dma(vm->scratch[0]), | |
2c86e55d MA |
226 | I915_CACHE_NONE, PTE_READ_ONLY); |
227 | ||
89351925 | 228 | vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); |
fa812ce9 CW |
229 | if (IS_ERR(vm->scratch[1])) { |
230 | ret = PTR_ERR(vm->scratch[1]); | |
231 | goto err_scratch0; | |
232 | } | |
89351925 | 233 | |
529b9ec8 | 234 | ret = map_pt_dma(vm, vm->scratch[1]); |
fa812ce9 CW |
235 | if (ret) |
236 | goto err_scratch1; | |
2c86e55d | 237 | |
89351925 | 238 | fill32_px(vm->scratch[1], vm->scratch[0]->encode); |
2c86e55d MA |
239 | |
240 | return 0; | |
fa812ce9 CW |
241 | |
242 | err_scratch1: | |
243 | i915_gem_object_put(vm->scratch[1]); | |
244 | err_scratch0: | |
245 | i915_gem_object_put(vm->scratch[0]); | |
246 | return ret; | |
2c86e55d MA |
247 | } |
248 | ||
249 | static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) | |
250 | { | |
251 | struct i915_page_directory * const pd = ppgtt->base.pd; | |
2c86e55d MA |
252 | struct i915_page_table *pt; |
253 | u32 pde; | |
254 | ||
255 | gen6_for_all_pdes(pt, pd, pde) | |
89351925 | 256 | if (pt) |
82adf901 | 257 | free_pt(&ppgtt->base.vm, pt); |
2c86e55d MA |
258 | } |
259 | ||
260 | static void gen6_ppgtt_cleanup(struct i915_address_space *vm) | |
261 | { | |
262 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); | |
263 | ||
264 | __i915_vma_put(ppgtt->vma); | |
265 | ||
266 | gen6_ppgtt_free_pd(ppgtt); | |
267 | free_scratch(vm); | |
268 | ||
269 | mutex_destroy(&ppgtt->flush); | |
270 | mutex_destroy(&ppgtt->pin_mutex); | |
82adf901 CW |
271 | |
272 | free_pd(&ppgtt->base.vm, ppgtt->base.pd); | |
2c86e55d MA |
273 | } |
274 | ||
275 | static int pd_vma_set_pages(struct i915_vma *vma) | |
276 | { | |
277 | vma->pages = ERR_PTR(-ENODEV); | |
278 | return 0; | |
279 | } | |
280 | ||
281 | static void pd_vma_clear_pages(struct i915_vma *vma) | |
282 | { | |
283 | GEM_BUG_ON(!vma->pages); | |
284 | ||
285 | vma->pages = NULL; | |
286 | } | |
287 | ||
cd0452aa CW |
288 | static void pd_vma_bind(struct i915_address_space *vm, |
289 | struct i915_vm_pt_stash *stash, | |
290 | struct i915_vma *vma, | |
291 | enum i915_cache_level cache_level, | |
292 | u32 unused) | |
2c86e55d | 293 | { |
12b07256 | 294 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
2c86e55d MA |
295 | struct gen6_ppgtt *ppgtt = vma->private; |
296 | u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; | |
297 | ||
89351925 | 298 | ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10; |
2c86e55d MA |
299 | ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; |
300 | ||
301 | gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); | |
2c86e55d MA |
302 | } |
303 | ||
12b07256 | 304 | static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) |
2c86e55d MA |
305 | { |
306 | struct gen6_ppgtt *ppgtt = vma->private; | |
307 | struct i915_page_directory * const pd = ppgtt->base.pd; | |
2c86e55d MA |
308 | struct i915_page_table *pt; |
309 | unsigned int pde; | |
310 | ||
311 | if (!ppgtt->scan_for_unused_pt) | |
312 | return; | |
313 | ||
314 | /* Free all no longer used page tables */ | |
315 | gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { | |
89351925 | 316 | if (!pt || atomic_read(&pt->used)) |
2c86e55d MA |
317 | continue; |
318 | ||
82adf901 | 319 | free_pt(&ppgtt->base.vm, pt); |
89351925 | 320 | pd->entry[pde] = NULL; |
2c86e55d MA |
321 | } |
322 | ||
323 | ppgtt->scan_for_unused_pt = false; | |
324 | } | |
325 | ||
326 | static const struct i915_vma_ops pd_vma_ops = { | |
327 | .set_pages = pd_vma_set_pages, | |
328 | .clear_pages = pd_vma_clear_pages, | |
329 | .bind_vma = pd_vma_bind, | |
330 | .unbind_vma = pd_vma_unbind, | |
331 | }; | |
332 | ||
333 | static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) | |
334 | { | |
335 | struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; | |
336 | struct i915_vma *vma; | |
337 | ||
338 | GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); | |
339 | GEM_BUG_ON(size > ggtt->vm.total); | |
340 | ||
341 | vma = i915_vma_alloc(); | |
342 | if (!vma) | |
343 | return ERR_PTR(-ENOMEM); | |
344 | ||
c3b14760 | 345 | i915_active_init(&vma->active, NULL, NULL, 0); |
2c86e55d MA |
346 | |
347 | kref_init(&vma->ref); | |
348 | mutex_init(&vma->pages_mutex); | |
349 | vma->vm = i915_vm_get(&ggtt->vm); | |
350 | vma->ops = &pd_vma_ops; | |
351 | vma->private = ppgtt; | |
352 | ||
353 | vma->size = size; | |
354 | vma->fence_size = size; | |
355 | atomic_set(&vma->flags, I915_VMA_GGTT); | |
356 | vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ | |
357 | ||
358 | INIT_LIST_HEAD(&vma->obj_link); | |
359 | INIT_LIST_HEAD(&vma->closed_link); | |
360 | ||
361 | return vma; | |
362 | } | |
363 | ||
47b08693 | 364 | int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) |
2c86e55d MA |
365 | { |
366 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); | |
367 | int err; | |
368 | ||
369 | GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); | |
370 | ||
371 | /* | |
372 | * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt | |
373 | * which will be pinned into every active context. | |
374 | * (When vma->pin_count becomes atomic, I expect we will naturally | |
375 | * need a larger, unpacked, type and kill this redundancy.) | |
376 | */ | |
377 | if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) | |
378 | return 0; | |
379 | ||
380 | if (mutex_lock_interruptible(&ppgtt->pin_mutex)) | |
381 | return -EINTR; | |
382 | ||
383 | /* | |
384 | * PPGTT PDEs reside in the GGTT and consists of 512 entries. The | |
385 | * allocator works in address space sizes, so it's multiplied by page | |
386 | * size. We allocate at the top of the GTT to avoid fragmentation. | |
387 | */ | |
388 | err = 0; | |
389 | if (!atomic_read(&ppgtt->pin_count)) | |
47b08693 | 390 | err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); |
2c86e55d MA |
391 | if (!err) |
392 | atomic_inc(&ppgtt->pin_count); | |
393 | mutex_unlock(&ppgtt->pin_mutex); | |
394 | ||
395 | return err; | |
396 | } | |
397 | ||
398 | void gen6_ppgtt_unpin(struct i915_ppgtt *base) | |
399 | { | |
400 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); | |
401 | ||
402 | GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); | |
403 | if (atomic_dec_and_test(&ppgtt->pin_count)) | |
404 | i915_vma_unpin(ppgtt->vma); | |
405 | } | |
406 | ||
2c86e55d MA |
407 | struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) |
408 | { | |
409 | struct i915_ggtt * const ggtt = gt->ggtt; | |
410 | struct gen6_ppgtt *ppgtt; | |
411 | int err; | |
412 | ||
413 | ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); | |
414 | if (!ppgtt) | |
415 | return ERR_PTR(-ENOMEM); | |
416 | ||
417 | mutex_init(&ppgtt->flush); | |
418 | mutex_init(&ppgtt->pin_mutex); | |
419 | ||
a259cc14 | 420 | ppgtt_init(&ppgtt->base, gt, 0); |
cd0452aa | 421 | ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); |
2c86e55d MA |
422 | ppgtt->base.vm.top = 1; |
423 | ||
424 | ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; | |
425 | ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; | |
426 | ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; | |
427 | ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; | |
428 | ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; | |
429 | ||
89351925 | 430 | ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; |
2c86e55d MA |
431 | ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; |
432 | ||
82adf901 | 433 | ppgtt->base.pd = __alloc_pd(I915_PDES); |
2c86e55d MA |
434 | if (!ppgtt->base.pd) { |
435 | err = -ENOMEM; | |
436 | goto err_free; | |
437 | } | |
438 | ||
439 | err = gen6_ppgtt_init_scratch(ppgtt); | |
440 | if (err) | |
441 | goto err_pd; | |
442 | ||
443 | ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); | |
444 | if (IS_ERR(ppgtt->vma)) { | |
445 | err = PTR_ERR(ppgtt->vma); | |
446 | goto err_scratch; | |
447 | } | |
448 | ||
449 | return &ppgtt->base; | |
450 | ||
451 | err_scratch: | |
452 | free_scratch(&ppgtt->base.vm); | |
453 | err_pd: | |
82adf901 | 454 | free_pd(&ppgtt->base.vm, ppgtt->base.pd); |
2c86e55d MA |
455 | err_free: |
456 | mutex_destroy(&ppgtt->pin_mutex); | |
457 | kfree(ppgtt); | |
458 | return ERR_PTR(err); | |
459 | } |