drm/i915: Fix kerneldocs for intel_audio.c
[linux-block.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
CommitLineData
76aaf220
DV
1/*
2 * Copyright © 2010 Daniel Vetter
c4ac524c 3 * Copyright © 2011-2014 Intel Corporation
76aaf220
DV
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 */
25
aae4a3d8
CW
26#include <linux/slab.h> /* fault-inject.h is not standalone! */
27
28#include <linux/fault-inject.h>
e007b19d 29#include <linux/log2.h>
606fec95 30#include <linux/random.h>
0e46ce2e 31#include <linux/seq_file.h>
5bab6f60 32#include <linux/stop_machine.h>
e007b19d 33
ed3ba079
LA
34#include <asm/set_memory.h>
35
760285e7
DH
36#include <drm/drmP.h>
37#include <drm/i915_drm.h>
e007b19d 38
76aaf220 39#include "i915_drv.h"
5dda8fa3 40#include "i915_vgpu.h"
76aaf220
DV
41#include "i915_trace.h"
42#include "intel_drv.h"
d07f0e59 43#include "intel_frontbuffer.h"
76aaf220 44
bb8f9cff
CW
45#define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
46
45f8f69a
TU
47/**
48 * DOC: Global GTT views
49 *
50 * Background and previous state
51 *
52 * Historically objects could exists (be bound) in global GTT space only as
53 * singular instances with a view representing all of the object's backing pages
54 * in a linear fashion. This view will be called a normal view.
55 *
56 * To support multiple views of the same object, where the number of mapped
57 * pages is not equal to the backing store, or where the layout of the pages
58 * is not linear, concept of a GGTT view was added.
59 *
60 * One example of an alternative view is a stereo display driven by a single
61 * image. In this case we would have a framebuffer looking like this
62 * (2x2 pages):
63 *
64 * 12
65 * 34
66 *
67 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
68 * rendering. In contrast, fed to the display engine would be an alternative
69 * view which could look something like this:
70 *
71 * 1212
72 * 3434
73 *
74 * In this example both the size and layout of pages in the alternative view is
75 * different from the normal view.
76 *
77 * Implementation and usage
78 *
79 * GGTT views are implemented using VMAs and are distinguished via enum
80 * i915_ggtt_view_type and struct i915_ggtt_view.
81 *
82 * A new flavour of core GEM functions which work with GGTT bound objects were
ec7adb6e
JL
83 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
84 * renaming in large amounts of code. They take the struct i915_ggtt_view
85 * parameter encapsulating all metadata required to implement a view.
45f8f69a
TU
86 *
87 * As a helper for callers which are only interested in the normal view,
88 * globally const i915_ggtt_view_normal singleton instance exists. All old core
89 * GEM API functions, the ones not taking the view parameter, are operating on,
90 * or with the normal GGTT view.
91 *
92 * Code wanting to add or use a new GGTT view needs to:
93 *
94 * 1. Add a new enum with a suitable name.
95 * 2. Extend the metadata in the i915_ggtt_view structure if required.
96 * 3. Add support to i915_get_vma_pages().
97 *
98 * New views are required to build a scatter-gather table from within the
99 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
100 * exists for the lifetime of an VMA.
101 *
102 * Core API is designed to have copy semantics which means that passed in
103 * struct i915_ggtt_view does not need to be persistent (left around after
104 * calling the core API functions).
105 *
106 */
107
70b9f6f8
DV
108static int
109i915_get_ggtt_vma_pages(struct i915_vma *vma);
110
7c3f86b6
CW
111static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
112{
113 /* Note that as an uncached mmio write, this should flush the
114 * WCB of the writes into the GGTT before it triggers the invalidate.
115 */
116 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
117}
118
119static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
120{
121 gen6_ggtt_invalidate(dev_priv);
122 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
123}
124
125static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
126{
127 intel_gtt_chipset_flush();
128}
129
130static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
131{
132 i915->ggtt.invalidate(i915);
133}
134
c033666a
CW
135int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
136 int enable_ppgtt)
cfa7c862 137{
1893a71b 138 bool has_full_ppgtt;
1f9a99e0 139 bool has_full_48bit_ppgtt;
1893a71b 140
612dde7e
JL
141 if (!dev_priv->info.has_aliasing_ppgtt)
142 return 0;
143
9e1d0e60
MT
144 has_full_ppgtt = dev_priv->info.has_full_ppgtt;
145 has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
1893a71b 146
e320d400 147 if (intel_vgpu_active(dev_priv)) {
8a4ab66f 148 /* GVT-g has no support for 32bit ppgtt */
e320d400 149 has_full_ppgtt = false;
8a4ab66f 150 has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
e320d400 151 }
71ba2d64 152
70ee45e1
DL
153 /*
154 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
155 * execlists, the sole mechanism available to submit work.
156 */
c033666a 157 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
cfa7c862
DV
158 return 0;
159
160 if (enable_ppgtt == 1)
161 return 1;
162
1893a71b 163 if (enable_ppgtt == 2 && has_full_ppgtt)
cfa7c862
DV
164 return 2;
165
1f9a99e0
MT
166 if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
167 return 3;
168
93a25a9e 169 /* Disable ppgtt on SNB if VT-d is on. */
80debff8 170 if (IS_GEN6(dev_priv) && intel_vtd_active()) {
93a25a9e 171 DRM_INFO("Disabling PPGTT because VT-d is on\n");
cfa7c862 172 return 0;
93a25a9e 173 }
93a25a9e 174
62942ed7 175 /* Early VLV doesn't have this */
91c8a326 176 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
62942ed7
JB
177 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
178 return 0;
179 }
180
4f044a88 181 if (INTEL_GEN(dev_priv) >= 8 && i915_modparams.enable_execlists) {
4fc05063
JL
182 if (has_full_48bit_ppgtt)
183 return 3;
184
185 if (has_full_ppgtt)
186 return 2;
187 }
188
612dde7e 189 return 1;
93a25a9e
DV
190}
191
70b9f6f8
DV
192static int ppgtt_bind_vma(struct i915_vma *vma,
193 enum i915_cache_level cache_level,
194 u32 unused)
47552659 195{
ff685975
CW
196 u32 pte_flags;
197 int ret;
198
1f23475c
MA
199 if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
200 ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
201 vma->size);
202 if (ret)
203 return ret;
204 }
47552659
DV
205
206 /* Currently applicable only to VLV */
ff685975 207 pte_flags = 0;
47552659
DV
208 if (vma->obj->gt_ro)
209 pte_flags |= PTE_READ_ONLY;
210
4a234c5f 211 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
70b9f6f8
DV
212
213 return 0;
47552659
DV
214}
215
216static void ppgtt_unbind_vma(struct i915_vma *vma)
217{
ff685975 218 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
47552659 219}
6f65e29a 220
fa3f46af
MA
221static int ppgtt_set_pages(struct i915_vma *vma)
222{
223 GEM_BUG_ON(vma->pages);
224
225 vma->pages = vma->obj->mm.pages;
226
7464284b
MA
227 vma->page_sizes = vma->obj->mm.page_sizes;
228
fa3f46af
MA
229 return 0;
230}
231
232static void clear_pages(struct i915_vma *vma)
233{
234 GEM_BUG_ON(!vma->pages);
235
236 if (vma->pages != vma->obj->mm.pages) {
237 sg_free_table(vma->pages);
238 kfree(vma->pages);
239 }
240 vma->pages = NULL;
7464284b
MA
241
242 memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
fa3f46af
MA
243}
244
2c642b07 245static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
4fb84d99 246 enum i915_cache_level level)
94ec8f61 247{
4fb84d99 248 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
94ec8f61 249 pte |= addr;
63c42e56
BW
250
251 switch (level) {
252 case I915_CACHE_NONE:
c095b97c 253 pte |= PPAT_UNCACHED;
63c42e56
BW
254 break;
255 case I915_CACHE_WT:
c095b97c 256 pte |= PPAT_DISPLAY_ELLC;
63c42e56
BW
257 break;
258 default:
c095b97c 259 pte |= PPAT_CACHED;
63c42e56
BW
260 break;
261 }
262
94ec8f61
BW
263 return pte;
264}
265
fe36f55d
MK
266static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
267 const enum i915_cache_level level)
b1fe6673 268{
07749ef3 269 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
b1fe6673
BW
270 pde |= addr;
271 if (level != I915_CACHE_NONE)
c095b97c 272 pde |= PPAT_CACHED_PDE;
b1fe6673 273 else
c095b97c 274 pde |= PPAT_UNCACHED;
b1fe6673
BW
275 return pde;
276}
277
762d9936
MT
278#define gen8_pdpe_encode gen8_pde_encode
279#define gen8_pml4e_encode gen8_pde_encode
280
07749ef3
MT
281static gen6_pte_t snb_pte_encode(dma_addr_t addr,
282 enum i915_cache_level level,
4fb84d99 283 u32 unused)
54d12527 284{
4fb84d99 285 gen6_pte_t pte = GEN6_PTE_VALID;
54d12527 286 pte |= GEN6_PTE_ADDR_ENCODE(addr);
e7210c3c
BW
287
288 switch (level) {
350ec881
CW
289 case I915_CACHE_L3_LLC:
290 case I915_CACHE_LLC:
291 pte |= GEN6_PTE_CACHE_LLC;
292 break;
293 case I915_CACHE_NONE:
294 pte |= GEN6_PTE_UNCACHED;
295 break;
296 default:
5f77eeb0 297 MISSING_CASE(level);
350ec881
CW
298 }
299
300 return pte;
301}
302
07749ef3
MT
303static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
304 enum i915_cache_level level,
4fb84d99 305 u32 unused)
350ec881 306{
4fb84d99 307 gen6_pte_t pte = GEN6_PTE_VALID;
350ec881
CW
308 pte |= GEN6_PTE_ADDR_ENCODE(addr);
309
310 switch (level) {
311 case I915_CACHE_L3_LLC:
312 pte |= GEN7_PTE_CACHE_L3_LLC;
e7210c3c
BW
313 break;
314 case I915_CACHE_LLC:
315 pte |= GEN6_PTE_CACHE_LLC;
316 break;
317 case I915_CACHE_NONE:
9119708c 318 pte |= GEN6_PTE_UNCACHED;
e7210c3c
BW
319 break;
320 default:
5f77eeb0 321 MISSING_CASE(level);
e7210c3c
BW
322 }
323
54d12527
BW
324 return pte;
325}
326
07749ef3
MT
327static gen6_pte_t byt_pte_encode(dma_addr_t addr,
328 enum i915_cache_level level,
4fb84d99 329 u32 flags)
93c34e70 330{
4fb84d99 331 gen6_pte_t pte = GEN6_PTE_VALID;
93c34e70
KG
332 pte |= GEN6_PTE_ADDR_ENCODE(addr);
333
24f3a8cf
AG
334 if (!(flags & PTE_READ_ONLY))
335 pte |= BYT_PTE_WRITEABLE;
93c34e70
KG
336
337 if (level != I915_CACHE_NONE)
338 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
339
340 return pte;
341}
342
07749ef3
MT
343static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
344 enum i915_cache_level level,
4fb84d99 345 u32 unused)
9119708c 346{
4fb84d99 347 gen6_pte_t pte = GEN6_PTE_VALID;
0d8ff15e 348 pte |= HSW_PTE_ADDR_ENCODE(addr);
9119708c
KG
349
350 if (level != I915_CACHE_NONE)
87a6b688 351 pte |= HSW_WB_LLC_AGE3;
9119708c
KG
352
353 return pte;
354}
355
07749ef3
MT
356static gen6_pte_t iris_pte_encode(dma_addr_t addr,
357 enum i915_cache_level level,
4fb84d99 358 u32 unused)
4d15c145 359{
4fb84d99 360 gen6_pte_t pte = GEN6_PTE_VALID;
4d15c145
BW
361 pte |= HSW_PTE_ADDR_ENCODE(addr);
362
651d794f
CW
363 switch (level) {
364 case I915_CACHE_NONE:
365 break;
366 case I915_CACHE_WT:
c51e9701 367 pte |= HSW_WT_ELLC_LLC_AGE3;
651d794f
CW
368 break;
369 default:
c51e9701 370 pte |= HSW_WB_ELLC_LLC_AGE3;
651d794f
CW
371 break;
372 }
4d15c145
BW
373
374 return pte;
375}
376
8448661d 377static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
678d96fb 378{
66df1014 379 struct pagevec *pvec = &vm->free_pages;
678d96fb 380
8448661d
CW
381 if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
382 i915_gem_shrink_all(vm->i915);
aae4a3d8 383
66df1014
CW
384 if (likely(pvec->nr))
385 return pvec->pages[--pvec->nr];
386
387 if (!vm->pt_kmap_wc)
388 return alloc_page(gfp);
389
390 /* A placeholder for a specific mutex to guard the WC stash */
391 lockdep_assert_held(&vm->i915->drm.struct_mutex);
392
393 /* Look in our global stash of WC pages... */
394 pvec = &vm->i915->mm.wc_stash;
395 if (likely(pvec->nr))
396 return pvec->pages[--pvec->nr];
397
398 /* Otherwise batch allocate pages to amoritize cost of set_pages_wc. */
399 do {
400 struct page *page;
8448661d 401
66df1014
CW
402 page = alloc_page(gfp);
403 if (unlikely(!page))
404 break;
405
406 pvec->pages[pvec->nr++] = page;
407 } while (pagevec_space(pvec));
408
409 if (unlikely(!pvec->nr))
8448661d
CW
410 return NULL;
411
66df1014 412 set_pages_array_wc(pvec->pages, pvec->nr);
8448661d 413
66df1014 414 return pvec->pages[--pvec->nr];
8448661d
CW
415}
416
66df1014
CW
417static void vm_free_pages_release(struct i915_address_space *vm,
418 bool immediate)
8448661d 419{
66df1014
CW
420 struct pagevec *pvec = &vm->free_pages;
421
422 GEM_BUG_ON(!pagevec_count(pvec));
8448661d 423
66df1014
CW
424 if (vm->pt_kmap_wc) {
425 struct pagevec *stash = &vm->i915->mm.wc_stash;
426
427 /* When we use WC, first fill up the global stash and then
428 * only if full immediately free the overflow.
429 */
8448661d 430
66df1014
CW
431 lockdep_assert_held(&vm->i915->drm.struct_mutex);
432 if (pagevec_space(stash)) {
433 do {
434 stash->pages[stash->nr++] =
435 pvec->pages[--pvec->nr];
436 if (!pvec->nr)
437 return;
438 } while (pagevec_space(stash));
439
440 /* As we have made some room in the VM's free_pages,
441 * we can wait for it to fill again. Unless we are
442 * inside i915_address_space_fini() and must
443 * immediately release the pages!
444 */
445 if (!immediate)
446 return;
447 }
448
449 set_pages_array_wb(pvec->pages, pvec->nr);
450 }
451
452 __pagevec_release(pvec);
8448661d
CW
453}
454
455static void vm_free_page(struct i915_address_space *vm, struct page *page)
456{
15e4cda9
CW
457 /*
458 * On !llc, we need to change the pages back to WB. We only do so
459 * in bulk, so we rarely need to change the page attributes here,
460 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
461 * To make detection of the possible sleep more likely, use an
462 * unconditional might_sleep() for everybody.
463 */
464 might_sleep();
8448661d 465 if (!pagevec_add(&vm->free_pages, page))
66df1014 466 vm_free_pages_release(vm, false);
8448661d 467}
678d96fb 468
8448661d
CW
469static int __setup_page_dma(struct i915_address_space *vm,
470 struct i915_page_dma *p,
471 gfp_t gfp)
472{
473 p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
474 if (unlikely(!p->page))
475 return -ENOMEM;
678d96fb 476
8448661d
CW
477 p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
478 PCI_DMA_BIDIRECTIONAL);
479 if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
480 vm_free_page(vm, p->page);
481 return -ENOMEM;
44159ddb 482 }
1266cdb1
MT
483
484 return 0;
678d96fb
BW
485}
486
8448661d 487static int setup_page_dma(struct i915_address_space *vm,
275a991c 488 struct i915_page_dma *p)
c114f76a 489{
8448661d 490 return __setup_page_dma(vm, p, I915_GFP_DMA);
c114f76a
MK
491}
492
8448661d 493static void cleanup_page_dma(struct i915_address_space *vm,
275a991c 494 struct i915_page_dma *p)
06fda602 495{
8448661d
CW
496 dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
497 vm_free_page(vm, p->page);
44159ddb
MK
498}
499
9231da70 500#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
d1c54acd 501
8448661d
CW
502#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
503#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
504#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
505#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
567047be 506
8448661d
CW
507static void fill_page_dma(struct i915_address_space *vm,
508 struct i915_page_dma *p,
509 const u64 val)
d1c54acd 510{
9231da70 511 u64 * const vaddr = kmap_atomic(p->page);
d1c54acd 512
4dd504f7 513 memset64(vaddr, val, PAGE_SIZE / sizeof(val));
d1c54acd 514
9231da70 515 kunmap_atomic(vaddr);
d1c54acd
MK
516}
517
8448661d
CW
518static void fill_page_dma_32(struct i915_address_space *vm,
519 struct i915_page_dma *p,
520 const u32 v)
73eeea53 521{
8448661d 522 fill_page_dma(vm, p, (u64)v << 32 | v);
73eeea53
MK
523}
524
8bcdd0f7 525static int
8448661d 526setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
4ad2af1e 527{
aa095871 528 struct page *page = NULL;
66df1014 529 dma_addr_t addr;
aa095871 530 int order;
66df1014 531
aa095871
MA
532 /*
533 * In order to utilize 64K pages for an object with a size < 2M, we will
534 * need to support a 64K scratch page, given that every 16th entry for a
535 * page-table operating in 64K mode must point to a properly aligned 64K
536 * region, including any PTEs which happen to point to scratch.
537 *
538 * This is only relevant for the 48b PPGTT where we support
539 * huge-gtt-pages, see also i915_vma_insert().
540 *
541 * TODO: we should really consider write-protecting the scratch-page and
542 * sharing between ppgtt
543 */
544 if (i915_vm_is_48bit(vm) &&
545 HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
546 order = get_order(I915_GTT_PAGE_SIZE_64K);
06ea8c53 547 page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order);
aa095871
MA
548 if (page) {
549 addr = dma_map_page(vm->dma, page, 0,
550 I915_GTT_PAGE_SIZE_64K,
551 PCI_DMA_BIDIRECTIONAL);
552 if (unlikely(dma_mapping_error(vm->dma, addr))) {
553 __free_pages(page, order);
554 page = NULL;
555 }
556
557 if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) {
558 dma_unmap_page(vm->dma, addr,
559 I915_GTT_PAGE_SIZE_64K,
560 PCI_DMA_BIDIRECTIONAL);
561 __free_pages(page, order);
562 page = NULL;
563 }
564 }
565 }
66df1014 566
aa095871
MA
567 if (!page) {
568 order = 0;
569 page = alloc_page(gfp | __GFP_ZERO);
570 if (unlikely(!page))
571 return -ENOMEM;
572
573 addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE,
574 PCI_DMA_BIDIRECTIONAL);
575 if (unlikely(dma_mapping_error(vm->dma, addr))) {
576 __free_page(page);
577 return -ENOMEM;
578 }
66df1014
CW
579 }
580
581 vm->scratch_page.page = page;
582 vm->scratch_page.daddr = addr;
aa095871
MA
583 vm->scratch_page.order = order;
584
66df1014 585 return 0;
4ad2af1e
MK
586}
587
8448661d 588static void cleanup_scratch_page(struct i915_address_space *vm)
4ad2af1e 589{
66df1014
CW
590 struct i915_page_dma *p = &vm->scratch_page;
591
aa095871
MA
592 dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
593 PCI_DMA_BIDIRECTIONAL);
594 __free_pages(p->page, p->order);
4ad2af1e
MK
595}
596
8448661d 597static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
06fda602 598{
ec565b3c 599 struct i915_page_table *pt;
06fda602 600
dd19674b
CW
601 pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
602 if (unlikely(!pt))
06fda602
BW
603 return ERR_PTR(-ENOMEM);
604
dd19674b
CW
605 if (unlikely(setup_px(vm, pt))) {
606 kfree(pt);
607 return ERR_PTR(-ENOMEM);
608 }
06fda602 609
dd19674b 610 pt->used_ptes = 0;
06fda602
BW
611 return pt;
612}
613
8448661d 614static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
06fda602 615{
8448661d 616 cleanup_px(vm, pt);
2e906bea
MK
617 kfree(pt);
618}
619
620static void gen8_initialize_pt(struct i915_address_space *vm,
621 struct i915_page_table *pt)
622{
dd19674b
CW
623 fill_px(vm, pt,
624 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
2e906bea
MK
625}
626
627static void gen6_initialize_pt(struct i915_address_space *vm,
628 struct i915_page_table *pt)
629{
dd19674b
CW
630 fill32_px(vm, pt,
631 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
06fda602
BW
632}
633
8448661d 634static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
06fda602 635{
ec565b3c 636 struct i915_page_directory *pd;
06fda602 637
fe52e37f
CW
638 pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
639 if (unlikely(!pd))
06fda602
BW
640 return ERR_PTR(-ENOMEM);
641
fe52e37f
CW
642 if (unlikely(setup_px(vm, pd))) {
643 kfree(pd);
644 return ERR_PTR(-ENOMEM);
645 }
e5815a2e 646
fe52e37f 647 pd->used_pdes = 0;
06fda602
BW
648 return pd;
649}
650
8448661d 651static void free_pd(struct i915_address_space *vm,
275a991c 652 struct i915_page_directory *pd)
2e906bea 653{
fe52e37f
CW
654 cleanup_px(vm, pd);
655 kfree(pd);
2e906bea
MK
656}
657
658static void gen8_initialize_pd(struct i915_address_space *vm,
659 struct i915_page_directory *pd)
660{
dd19674b 661 unsigned int i;
2e906bea 662
dd19674b
CW
663 fill_px(vm, pd,
664 gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
665 for (i = 0; i < I915_PDES; i++)
666 pd->page_table[i] = vm->scratch_pt;
2e906bea
MK
667}
668
fe52e37f 669static int __pdp_init(struct i915_address_space *vm,
6ac18502
MT
670 struct i915_page_directory_pointer *pdp)
671{
3e490042 672 const unsigned int pdpes = i915_pdpes_per_pdp(vm);
e2b763ca 673 unsigned int i;
6ac18502 674
fe52e37f 675 pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
e2b763ca
CW
676 GFP_KERNEL | __GFP_NOWARN);
677 if (unlikely(!pdp->page_directory))
6ac18502 678 return -ENOMEM;
6ac18502 679
fe52e37f
CW
680 for (i = 0; i < pdpes; i++)
681 pdp->page_directory[i] = vm->scratch_pd;
682
6ac18502
MT
683 return 0;
684}
685
686static void __pdp_fini(struct i915_page_directory_pointer *pdp)
687{
6ac18502
MT
688 kfree(pdp->page_directory);
689 pdp->page_directory = NULL;
690}
691
1e6437b0
MK
692static inline bool use_4lvl(const struct i915_address_space *vm)
693{
694 return i915_vm_is_48bit(vm);
695}
696
8448661d
CW
697static struct i915_page_directory_pointer *
698alloc_pdp(struct i915_address_space *vm)
762d9936
MT
699{
700 struct i915_page_directory_pointer *pdp;
701 int ret = -ENOMEM;
702
1e6437b0 703 WARN_ON(!use_4lvl(vm));
762d9936
MT
704
705 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
706 if (!pdp)
707 return ERR_PTR(-ENOMEM);
708
fe52e37f 709 ret = __pdp_init(vm, pdp);
762d9936
MT
710 if (ret)
711 goto fail_bitmap;
712
8448661d 713 ret = setup_px(vm, pdp);
762d9936
MT
714 if (ret)
715 goto fail_page_m;
716
717 return pdp;
718
719fail_page_m:
720 __pdp_fini(pdp);
721fail_bitmap:
722 kfree(pdp);
723
724 return ERR_PTR(ret);
725}
726
8448661d 727static void free_pdp(struct i915_address_space *vm,
6ac18502
MT
728 struct i915_page_directory_pointer *pdp)
729{
730 __pdp_fini(pdp);
1e6437b0
MK
731
732 if (!use_4lvl(vm))
733 return;
734
735 cleanup_px(vm, pdp);
736 kfree(pdp);
762d9936
MT
737}
738
69ab76fd
MT
739static void gen8_initialize_pdp(struct i915_address_space *vm,
740 struct i915_page_directory_pointer *pdp)
741{
742 gen8_ppgtt_pdpe_t scratch_pdpe;
743
744 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
745
8448661d 746 fill_px(vm, pdp, scratch_pdpe);
69ab76fd
MT
747}
748
749static void gen8_initialize_pml4(struct i915_address_space *vm,
750 struct i915_pml4 *pml4)
751{
e2b763ca 752 unsigned int i;
762d9936 753
e2b763ca
CW
754 fill_px(vm, pml4,
755 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
756 for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
757 pml4->pdps[i] = vm->scratch_pdp;
6ac18502
MT
758}
759
94e409c1 760/* Broadwell Page Directory Pointer Descriptors */
e85b26dc 761static int gen8_write_pdp(struct drm_i915_gem_request *req,
7cb6d7ac
MT
762 unsigned entry,
763 dma_addr_t addr)
94e409c1 764{
4a570db5 765 struct intel_engine_cs *engine = req->engine;
73dec95e 766 u32 *cs;
94e409c1
BW
767
768 BUG_ON(entry >= 4);
769
73dec95e
TU
770 cs = intel_ring_begin(req, 6);
771 if (IS_ERR(cs))
772 return PTR_ERR(cs);
94e409c1 773
73dec95e
TU
774 *cs++ = MI_LOAD_REGISTER_IMM(1);
775 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry));
776 *cs++ = upper_32_bits(addr);
777 *cs++ = MI_LOAD_REGISTER_IMM(1);
778 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
779 *cs++ = lower_32_bits(addr);
780 intel_ring_advance(req, cs);
94e409c1
BW
781
782 return 0;
783}
784
e7167769
MK
785static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt,
786 struct drm_i915_gem_request *req)
94e409c1 787{
eeb9488e 788 int i, ret;
94e409c1 789
e7167769 790 for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
d852c7bf
MK
791 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
792
e85b26dc 793 ret = gen8_write_pdp(req, i, pd_daddr);
eeb9488e
BW
794 if (ret)
795 return ret;
94e409c1 796 }
d595bd4b 797
eeb9488e 798 return 0;
94e409c1
BW
799}
800
e7167769
MK
801static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt,
802 struct drm_i915_gem_request *req)
2dba3239
MT
803{
804 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
805}
806
fce93755
MK
807/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
808 * the page table structures, we mark them dirty so that
809 * context switching/execlist queuing code takes extra steps
810 * to ensure that tlbs are flushed.
811 */
812static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
813{
49d73912 814 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask;
fce93755
MK
815}
816
2ce5179f
MW
817/* Removes entries from a single page table, releasing it if it's empty.
818 * Caller can use the return value to update higher-level entries.
819 */
820static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
d209b9c3 821 struct i915_page_table *pt,
dd19674b 822 u64 start, u64 length)
459108b8 823{
d209b9c3 824 unsigned int num_entries = gen8_pte_count(start, length);
37c63934
MK
825 unsigned int pte = gen8_pte_index(start);
826 unsigned int pte_end = pte + num_entries;
894ccebe
CW
827 const gen8_pte_t scratch_pte =
828 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
829 gen8_pte_t *vaddr;
459108b8 830
dd19674b 831 GEM_BUG_ON(num_entries > pt->used_ptes);
37c63934 832
dd19674b
CW
833 pt->used_ptes -= num_entries;
834 if (!pt->used_ptes)
835 return true;
2ce5179f 836
9231da70 837 vaddr = kmap_atomic_px(pt);
37c63934 838 while (pte < pte_end)
894ccebe 839 vaddr[pte++] = scratch_pte;
9231da70 840 kunmap_atomic(vaddr);
2ce5179f
MW
841
842 return false;
d209b9c3 843}
06fda602 844
dd19674b
CW
845static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
846 struct i915_page_directory *pd,
847 struct i915_page_table *pt,
848 unsigned int pde)
849{
850 gen8_pde_t *vaddr;
851
852 pd->page_table[pde] = pt;
853
854 vaddr = kmap_atomic_px(pd);
855 vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
856 kunmap_atomic(vaddr);
857}
858
2ce5179f 859static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
d209b9c3 860 struct i915_page_directory *pd,
dd19674b 861 u64 start, u64 length)
d209b9c3
MW
862{
863 struct i915_page_table *pt;
dd19674b 864 u32 pde;
d209b9c3
MW
865
866 gen8_for_each_pde(pt, pd, start, length, pde) {
bf75d59e
CW
867 GEM_BUG_ON(pt == vm->scratch_pt);
868
dd19674b
CW
869 if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
870 continue;
06fda602 871
dd19674b 872 gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
bf75d59e 873 GEM_BUG_ON(!pd->used_pdes);
fe52e37f 874 pd->used_pdes--;
dd19674b
CW
875
876 free_pt(vm, pt);
2ce5179f
MW
877 }
878
fe52e37f
CW
879 return !pd->used_pdes;
880}
2ce5179f 881
fe52e37f
CW
882static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
883 struct i915_page_directory_pointer *pdp,
884 struct i915_page_directory *pd,
885 unsigned int pdpe)
886{
887 gen8_ppgtt_pdpe_t *vaddr;
888
889 pdp->page_directory[pdpe] = pd;
1e6437b0 890 if (!use_4lvl(vm))
fe52e37f
CW
891 return;
892
893 vaddr = kmap_atomic_px(pdp);
894 vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
895 kunmap_atomic(vaddr);
d209b9c3 896}
06fda602 897
2ce5179f
MW
898/* Removes entries from a single page dir pointer, releasing it if it's empty.
899 * Caller can use the return value to update higher-level entries
900 */
901static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
d209b9c3 902 struct i915_page_directory_pointer *pdp,
fe52e37f 903 u64 start, u64 length)
d209b9c3
MW
904{
905 struct i915_page_directory *pd;
fe52e37f 906 unsigned int pdpe;
06fda602 907
d209b9c3 908 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
bf75d59e
CW
909 GEM_BUG_ON(pd == vm->scratch_pd);
910
fe52e37f
CW
911 if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
912 continue;
459108b8 913
fe52e37f 914 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
bf75d59e 915 GEM_BUG_ON(!pdp->used_pdpes);
e2b763ca 916 pdp->used_pdpes--;
2ce5179f 917
fe52e37f
CW
918 free_pd(vm, pd);
919 }
fce93755 920
e2b763ca 921 return !pdp->used_pdpes;
d209b9c3 922}
459108b8 923
fe52e37f
CW
924static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
925 u64 start, u64 length)
926{
927 gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
928}
929
e2b763ca
CW
930static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
931 struct i915_page_directory_pointer *pdp,
932 unsigned int pml4e)
933{
934 gen8_ppgtt_pml4e_t *vaddr;
935
936 pml4->pdps[pml4e] = pdp;
937
938 vaddr = kmap_atomic_px(pml4);
939 vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
940 kunmap_atomic(vaddr);
941}
942
2ce5179f
MW
943/* Removes entries from a single pml4.
944 * This is the top-level structure in 4-level page tables used on gen8+.
945 * Empty entries are always scratch pml4e.
946 */
fe52e37f
CW
947static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
948 u64 start, u64 length)
d209b9c3 949{
fe52e37f
CW
950 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
951 struct i915_pml4 *pml4 = &ppgtt->pml4;
d209b9c3 952 struct i915_page_directory_pointer *pdp;
e2b763ca 953 unsigned int pml4e;
2ce5179f 954
1e6437b0 955 GEM_BUG_ON(!use_4lvl(vm));
459108b8 956
d209b9c3 957 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
bf75d59e
CW
958 GEM_BUG_ON(pdp == vm->scratch_pdp);
959
e2b763ca
CW
960 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
961 continue;
459108b8 962
e2b763ca 963 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
e2b763ca
CW
964
965 free_pdp(vm, pdp);
459108b8
BW
966 }
967}
968
5684514b 969static inline struct sgt_dma {
894ccebe
CW
970 struct scatterlist *sg;
971 dma_addr_t dma, max;
5684514b
CW
972} sgt_dma(struct i915_vma *vma) {
973 struct scatterlist *sg = vma->pages->sgl;
974 dma_addr_t addr = sg_dma_address(sg);
975 return (struct sgt_dma) { sg, addr, addr + sg->length };
976}
894ccebe 977
9e89f9ee
CW
978struct gen8_insert_pte {
979 u16 pml4e;
980 u16 pdpe;
981 u16 pde;
982 u16 pte;
983};
984
985static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
986{
987 return (struct gen8_insert_pte) {
988 gen8_pml4e_index(start),
989 gen8_pdpe_index(start),
990 gen8_pde_index(start),
991 gen8_pte_index(start),
992 };
993}
994
894ccebe
CW
995static __always_inline bool
996gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
f9b5b782 997 struct i915_page_directory_pointer *pdp,
894ccebe 998 struct sgt_dma *iter,
9e89f9ee 999 struct gen8_insert_pte *idx,
f9b5b782
MT
1000 enum i915_cache_level cache_level)
1001{
894ccebe
CW
1002 struct i915_page_directory *pd;
1003 const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1004 gen8_pte_t *vaddr;
1005 bool ret;
9df15b49 1006
3e490042 1007 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
9e89f9ee
CW
1008 pd = pdp->page_directory[idx->pdpe];
1009 vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
894ccebe 1010 do {
9e89f9ee
CW
1011 vaddr[idx->pte] = pte_encode | iter->dma;
1012
894ccebe
CW
1013 iter->dma += PAGE_SIZE;
1014 if (iter->dma >= iter->max) {
1015 iter->sg = __sg_next(iter->sg);
1016 if (!iter->sg) {
1017 ret = false;
1018 break;
1019 }
7ad47cf2 1020
894ccebe
CW
1021 iter->dma = sg_dma_address(iter->sg);
1022 iter->max = iter->dma + iter->sg->length;
d7b3de91 1023 }
9df15b49 1024
9e89f9ee
CW
1025 if (++idx->pte == GEN8_PTES) {
1026 idx->pte = 0;
1027
1028 if (++idx->pde == I915_PDES) {
1029 idx->pde = 0;
1030
894ccebe 1031 /* Limited by sg length for 3lvl */
9e89f9ee
CW
1032 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1033 idx->pdpe = 0;
894ccebe 1034 ret = true;
de5ba8eb 1035 break;
894ccebe
CW
1036 }
1037
3e490042 1038 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
9e89f9ee 1039 pd = pdp->page_directory[idx->pdpe];
7ad47cf2 1040 }
894ccebe 1041
9231da70 1042 kunmap_atomic(vaddr);
9e89f9ee 1043 vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
9df15b49 1044 }
894ccebe 1045 } while (1);
9231da70 1046 kunmap_atomic(vaddr);
d1c54acd 1047
894ccebe 1048 return ret;
9df15b49
BW
1049}
1050
894ccebe 1051static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
4a234c5f 1052 struct i915_vma *vma,
894ccebe
CW
1053 enum i915_cache_level cache_level,
1054 u32 unused)
f9b5b782 1055{
17369ba0 1056 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
5684514b 1057 struct sgt_dma iter = sgt_dma(vma);
4a234c5f 1058 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
f9b5b782 1059
9e89f9ee
CW
1060 gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1061 cache_level);
d9ec12f8
MA
1062
1063 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
894ccebe 1064}
de5ba8eb 1065
0a03852e
MA
1066static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1067 struct i915_page_directory_pointer **pdps,
1068 struct sgt_dma *iter,
1069 enum i915_cache_level cache_level)
1070{
1071 const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1072 u64 start = vma->node.start;
1073 dma_addr_t rem = iter->sg->length;
1074
1075 do {
1076 struct gen8_insert_pte idx = gen8_insert_pte(start);
1077 struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1078 struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1079 unsigned int page_size;
17a00cf7 1080 bool maybe_64K = false;
0a03852e
MA
1081 gen8_pte_t encode = pte_encode;
1082 gen8_pte_t *vaddr;
1083 u16 index, max;
1084
1085 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1086 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1087 rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1088 index = idx.pde;
1089 max = I915_PDES;
1090 page_size = I915_GTT_PAGE_SIZE_2M;
1091
1092 encode |= GEN8_PDE_PS_2M;
1093
1094 vaddr = kmap_atomic_px(pd);
1095 } else {
1096 struct i915_page_table *pt = pd->page_table[idx.pde];
1097
1098 index = idx.pte;
1099 max = GEN8_PTES;
1100 page_size = I915_GTT_PAGE_SIZE;
1101
17a00cf7
MA
1102 if (!index &&
1103 vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1104 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1105 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1106 rem >= (max - index) << PAGE_SHIFT))
1107 maybe_64K = true;
1108
0a03852e
MA
1109 vaddr = kmap_atomic_px(pt);
1110 }
1111
1112 do {
1113 GEM_BUG_ON(iter->sg->length < page_size);
1114 vaddr[index++] = encode | iter->dma;
1115
1116 start += page_size;
1117 iter->dma += page_size;
1118 rem -= page_size;
1119 if (iter->dma >= iter->max) {
1120 iter->sg = __sg_next(iter->sg);
1121 if (!iter->sg)
1122 break;
1123
1124 rem = iter->sg->length;
1125 iter->dma = sg_dma_address(iter->sg);
1126 iter->max = iter->dma + rem;
1127
17a00cf7
MA
1128 if (maybe_64K && index < max &&
1129 !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1130 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1131 rem >= (max - index) << PAGE_SHIFT)))
1132 maybe_64K = false;
1133
0a03852e
MA
1134 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1135 break;
1136 }
1137 } while (rem >= page_size && index < max);
1138
1139 kunmap_atomic(vaddr);
17a00cf7
MA
1140
1141 /*
1142 * Is it safe to mark the 2M block as 64K? -- Either we have
1143 * filled whole page-table with 64K entries, or filled part of
1144 * it and have reached the end of the sg table and we have
1145 * enough padding.
1146 */
1147 if (maybe_64K &&
1148 (index == max ||
1149 (i915_vm_has_scratch_64K(vma->vm) &&
1150 !iter->sg && IS_ALIGNED(vma->node.start +
1151 vma->node.size,
1152 I915_GTT_PAGE_SIZE_2M)))) {
1153 vaddr = kmap_atomic_px(pd);
1154 vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1155 kunmap_atomic(vaddr);
d9ec12f8 1156 page_size = I915_GTT_PAGE_SIZE_64K;
17a00cf7 1157 }
d9ec12f8
MA
1158
1159 vma->page_sizes.gtt |= page_size;
0a03852e
MA
1160 } while (iter->sg);
1161}
1162
894ccebe 1163static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
4a234c5f 1164 struct i915_vma *vma,
894ccebe
CW
1165 enum i915_cache_level cache_level,
1166 u32 unused)
1167{
1168 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
5684514b 1169 struct sgt_dma iter = sgt_dma(vma);
894ccebe 1170 struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
de5ba8eb 1171
0a03852e
MA
1172 if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1173 gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level);
1174 } else {
1175 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1176
1177 while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1178 &iter, &idx, cache_level))
1179 GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
d9ec12f8
MA
1180
1181 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
0a03852e 1182 }
f9b5b782
MT
1183}
1184
8448661d 1185static void gen8_free_page_tables(struct i915_address_space *vm,
f37c0505 1186 struct i915_page_directory *pd)
7ad47cf2
BW
1187{
1188 int i;
1189
567047be 1190 if (!px_page(pd))
7ad47cf2
BW
1191 return;
1192
fe52e37f
CW
1193 for (i = 0; i < I915_PDES; i++) {
1194 if (pd->page_table[i] != vm->scratch_pt)
1195 free_pt(vm, pd->page_table[i]);
06fda602 1196 }
d7b3de91
BW
1197}
1198
8776f02b
MK
1199static int gen8_init_scratch(struct i915_address_space *vm)
1200{
64c050db 1201 int ret;
8776f02b 1202
8448661d 1203 ret = setup_scratch_page(vm, I915_GFP_DMA);
8bcdd0f7
CW
1204 if (ret)
1205 return ret;
8776f02b 1206
8448661d 1207 vm->scratch_pt = alloc_pt(vm);
8776f02b 1208 if (IS_ERR(vm->scratch_pt)) {
64c050db
MA
1209 ret = PTR_ERR(vm->scratch_pt);
1210 goto free_scratch_page;
8776f02b
MK
1211 }
1212
8448661d 1213 vm->scratch_pd = alloc_pd(vm);
8776f02b 1214 if (IS_ERR(vm->scratch_pd)) {
64c050db
MA
1215 ret = PTR_ERR(vm->scratch_pd);
1216 goto free_pt;
8776f02b
MK
1217 }
1218
1e6437b0 1219 if (use_4lvl(vm)) {
8448661d 1220 vm->scratch_pdp = alloc_pdp(vm);
69ab76fd 1221 if (IS_ERR(vm->scratch_pdp)) {
64c050db
MA
1222 ret = PTR_ERR(vm->scratch_pdp);
1223 goto free_pd;
69ab76fd
MT
1224 }
1225 }
1226
8776f02b
MK
1227 gen8_initialize_pt(vm, vm->scratch_pt);
1228 gen8_initialize_pd(vm, vm->scratch_pd);
1e6437b0 1229 if (use_4lvl(vm))
69ab76fd 1230 gen8_initialize_pdp(vm, vm->scratch_pdp);
8776f02b
MK
1231
1232 return 0;
64c050db
MA
1233
1234free_pd:
8448661d 1235 free_pd(vm, vm->scratch_pd);
64c050db 1236free_pt:
8448661d 1237 free_pt(vm, vm->scratch_pt);
64c050db 1238free_scratch_page:
8448661d 1239 cleanup_scratch_page(vm);
64c050db
MA
1240
1241 return ret;
8776f02b
MK
1242}
1243
650da34c
ZL
1244static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1245{
1e6437b0
MK
1246 struct i915_address_space *vm = &ppgtt->base;
1247 struct drm_i915_private *dev_priv = vm->i915;
650da34c 1248 enum vgt_g2v_type msg;
650da34c
ZL
1249 int i;
1250
1e6437b0
MK
1251 if (use_4lvl(vm)) {
1252 const u64 daddr = px_dma(&ppgtt->pml4);
650da34c 1253
ab75bb5d
VS
1254 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1255 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
650da34c
ZL
1256
1257 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1258 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1259 } else {
e7167769 1260 for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1e6437b0 1261 const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
650da34c 1262
ab75bb5d
VS
1263 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1264 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
650da34c
ZL
1265 }
1266
1267 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1268 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1269 }
1270
1271 I915_WRITE(vgtif_reg(g2v_notify), msg);
1272
1273 return 0;
1274}
1275
8776f02b
MK
1276static void gen8_free_scratch(struct i915_address_space *vm)
1277{
1e6437b0 1278 if (use_4lvl(vm))
8448661d
CW
1279 free_pdp(vm, vm->scratch_pdp);
1280 free_pd(vm, vm->scratch_pd);
1281 free_pt(vm, vm->scratch_pt);
1282 cleanup_scratch_page(vm);
8776f02b
MK
1283}
1284
8448661d 1285static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
762d9936 1286 struct i915_page_directory_pointer *pdp)
b45a6715 1287{
3e490042 1288 const unsigned int pdpes = i915_pdpes_per_pdp(vm);
b45a6715
BW
1289 int i;
1290
3e490042 1291 for (i = 0; i < pdpes; i++) {
fe52e37f 1292 if (pdp->page_directory[i] == vm->scratch_pd)
06fda602
BW
1293 continue;
1294
8448661d
CW
1295 gen8_free_page_tables(vm, pdp->page_directory[i]);
1296 free_pd(vm, pdp->page_directory[i]);
7ad47cf2 1297 }
69876bed 1298
8448661d 1299 free_pdp(vm, pdp);
762d9936
MT
1300}
1301
1302static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1303{
1304 int i;
1305
c5d092a4
CW
1306 for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1307 if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp)
762d9936
MT
1308 continue;
1309
8448661d 1310 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
762d9936
MT
1311 }
1312
8448661d 1313 cleanup_px(&ppgtt->base, &ppgtt->pml4);
762d9936
MT
1314}
1315
1316static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1317{
49d73912 1318 struct drm_i915_private *dev_priv = vm->i915;
e5716f55 1319 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
762d9936 1320
275a991c 1321 if (intel_vgpu_active(dev_priv))
650da34c
ZL
1322 gen8_ppgtt_notify_vgt(ppgtt, false);
1323
1e6437b0 1324 if (use_4lvl(vm))
762d9936 1325 gen8_ppgtt_cleanup_4lvl(ppgtt);
1e6437b0
MK
1326 else
1327 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
d4ec9da0 1328
8776f02b 1329 gen8_free_scratch(vm);
b45a6715
BW
1330}
1331
fe52e37f
CW
1332static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1333 struct i915_page_directory *pd,
1334 u64 start, u64 length)
bf2b4ed2 1335{
d7b2633d 1336 struct i915_page_table *pt;
dd19674b 1337 u64 from = start;
fe52e37f 1338 unsigned int pde;
bf2b4ed2 1339
e8ebd8e2 1340 gen8_for_each_pde(pt, pd, start, length, pde) {
14826673
CW
1341 int count = gen8_pte_count(start, length);
1342
fe52e37f 1343 if (pt == vm->scratch_pt) {
dd19674b
CW
1344 pt = alloc_pt(vm);
1345 if (IS_ERR(pt))
1346 goto unwind;
5441f0cb 1347
22a8a4fc 1348 if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
14826673 1349 gen8_initialize_pt(vm, pt);
fe52e37f
CW
1350
1351 gen8_ppgtt_set_pde(vm, pd, pt, pde);
1352 pd->used_pdes++;
bf75d59e 1353 GEM_BUG_ON(pd->used_pdes > I915_PDES);
dd19674b 1354 }
fe52e37f 1355
14826673 1356 pt->used_ptes += count;
7ad47cf2 1357 }
bf2b4ed2 1358 return 0;
7ad47cf2 1359
dd19674b
CW
1360unwind:
1361 gen8_ppgtt_clear_pd(vm, pd, from, start - from);
d7b3de91 1362 return -ENOMEM;
bf2b4ed2
BW
1363}
1364
c5d092a4
CW
1365static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1366 struct i915_page_directory_pointer *pdp,
1367 u64 start, u64 length)
bf2b4ed2 1368{
5441f0cb 1369 struct i915_page_directory *pd;
e2b763ca
CW
1370 u64 from = start;
1371 unsigned int pdpe;
bf2b4ed2
BW
1372 int ret;
1373
e8ebd8e2 1374 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
e2b763ca
CW
1375 if (pd == vm->scratch_pd) {
1376 pd = alloc_pd(vm);
1377 if (IS_ERR(pd))
1378 goto unwind;
5441f0cb 1379
e2b763ca 1380 gen8_initialize_pd(vm, pd);
fe52e37f 1381 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
e2b763ca 1382 pdp->used_pdpes++;
3e490042 1383 GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
75afcf72
CW
1384
1385 mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
e2b763ca
CW
1386 }
1387
1388 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
bf75d59e
CW
1389 if (unlikely(ret))
1390 goto unwind_pd;
fe52e37f 1391 }
33c8819f 1392
d7b3de91 1393 return 0;
bf2b4ed2 1394
bf75d59e
CW
1395unwind_pd:
1396 if (!pd->used_pdes) {
1397 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1398 GEM_BUG_ON(!pdp->used_pdpes);
1399 pdp->used_pdpes--;
1400 free_pd(vm, pd);
1401 }
e2b763ca
CW
1402unwind:
1403 gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1404 return -ENOMEM;
bf2b4ed2
BW
1405}
1406
c5d092a4
CW
1407static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1408 u64 start, u64 length)
762d9936 1409{
c5d092a4
CW
1410 return gen8_ppgtt_alloc_pdp(vm,
1411 &i915_vm_to_ppgtt(vm)->pdp, start, length);
1412}
762d9936 1413
c5d092a4
CW
1414static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1415 u64 start, u64 length)
1416{
1417 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1418 struct i915_pml4 *pml4 = &ppgtt->pml4;
1419 struct i915_page_directory_pointer *pdp;
1420 u64 from = start;
1421 u32 pml4e;
1422 int ret;
762d9936 1423
e8ebd8e2 1424 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
c5d092a4
CW
1425 if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1426 pdp = alloc_pdp(vm);
1427 if (IS_ERR(pdp))
1428 goto unwind;
762d9936 1429
c5d092a4
CW
1430 gen8_initialize_pdp(vm, pdp);
1431 gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1432 }
762d9936 1433
c5d092a4 1434 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
bf75d59e
CW
1435 if (unlikely(ret))
1436 goto unwind_pdp;
762d9936
MT
1437 }
1438
762d9936
MT
1439 return 0;
1440
bf75d59e
CW
1441unwind_pdp:
1442 if (!pdp->used_pdpes) {
1443 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1444 free_pdp(vm, pdp);
1445 }
c5d092a4
CW
1446unwind:
1447 gen8_ppgtt_clear_4lvl(vm, from, start - from);
1448 return -ENOMEM;
762d9936
MT
1449}
1450
8448661d
CW
1451static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
1452 struct i915_page_directory_pointer *pdp,
75c7b0b8 1453 u64 start, u64 length,
ea91e401
MT
1454 gen8_pte_t scratch_pte,
1455 struct seq_file *m)
1456{
3e490042 1457 struct i915_address_space *vm = &ppgtt->base;
ea91e401 1458 struct i915_page_directory *pd;
75c7b0b8 1459 u32 pdpe;
ea91e401 1460
e8ebd8e2 1461 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
ea91e401 1462 struct i915_page_table *pt;
75c7b0b8
CW
1463 u64 pd_len = length;
1464 u64 pd_start = start;
1465 u32 pde;
ea91e401 1466
e2b763ca 1467 if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
ea91e401
MT
1468 continue;
1469
1470 seq_printf(m, "\tPDPE #%d\n", pdpe);
e8ebd8e2 1471 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
75c7b0b8 1472 u32 pte;
ea91e401
MT
1473 gen8_pte_t *pt_vaddr;
1474
fe52e37f 1475 if (pd->page_table[pde] == ppgtt->base.scratch_pt)
ea91e401
MT
1476 continue;
1477
9231da70 1478 pt_vaddr = kmap_atomic_px(pt);
ea91e401 1479 for (pte = 0; pte < GEN8_PTES; pte += 4) {
75c7b0b8
CW
1480 u64 va = (pdpe << GEN8_PDPE_SHIFT |
1481 pde << GEN8_PDE_SHIFT |
1482 pte << GEN8_PTE_SHIFT);
ea91e401
MT
1483 int i;
1484 bool found = false;
1485
1486 for (i = 0; i < 4; i++)
1487 if (pt_vaddr[pte + i] != scratch_pte)
1488 found = true;
1489 if (!found)
1490 continue;
1491
1492 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1493 for (i = 0; i < 4; i++) {
1494 if (pt_vaddr[pte + i] != scratch_pte)
1495 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1496 else
1497 seq_puts(m, " SCRATCH ");
1498 }
1499 seq_puts(m, "\n");
1500 }
ea91e401
MT
1501 kunmap_atomic(pt_vaddr);
1502 }
1503 }
1504}
1505
1506static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1507{
1508 struct i915_address_space *vm = &ppgtt->base;
894ccebe
CW
1509 const gen8_pte_t scratch_pte =
1510 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
381b943b 1511 u64 start = 0, length = ppgtt->base.total;
ea91e401 1512
1e6437b0 1513 if (use_4lvl(vm)) {
75c7b0b8 1514 u64 pml4e;
ea91e401
MT
1515 struct i915_pml4 *pml4 = &ppgtt->pml4;
1516 struct i915_page_directory_pointer *pdp;
1517
e8ebd8e2 1518 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
c5d092a4 1519 if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp)
ea91e401
MT
1520 continue;
1521
1522 seq_printf(m, " PML4E #%llu\n", pml4e);
8448661d 1523 gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
ea91e401 1524 }
1e6437b0
MK
1525 } else {
1526 gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
ea91e401
MT
1527 }
1528}
1529
e2b763ca 1530static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
331f38e7 1531{
e2b763ca
CW
1532 struct i915_address_space *vm = &ppgtt->base;
1533 struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1534 struct i915_page_directory *pd;
1535 u64 start = 0, length = ppgtt->base.total;
1536 u64 from = start;
1537 unsigned int pdpe;
331f38e7 1538
e2b763ca
CW
1539 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1540 pd = alloc_pd(vm);
1541 if (IS_ERR(pd))
1542 goto unwind;
331f38e7 1543
e2b763ca
CW
1544 gen8_initialize_pd(vm, pd);
1545 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1546 pdp->used_pdpes++;
1547 }
331f38e7 1548
e2b763ca
CW
1549 pdp->used_pdpes++; /* never remove */
1550 return 0;
331f38e7 1551
e2b763ca
CW
1552unwind:
1553 start -= from;
1554 gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1555 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1556 free_pd(vm, pd);
1557 }
1558 pdp->used_pdpes = 0;
1559 return -ENOMEM;
331f38e7
ZL
1560}
1561
eb0b44ad 1562/*
f3a964b9
BW
1563 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1564 * with a net effect resembling a 2-level page table in normal x86 terms. Each
1565 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1566 * space.
37aca44a 1567 *
f3a964b9 1568 */
5c5f6457 1569static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
37aca44a 1570{
1e6437b0
MK
1571 struct i915_address_space *vm = &ppgtt->base;
1572 struct drm_i915_private *dev_priv = vm->i915;
8776f02b 1573 int ret;
7cb6d7ac 1574
1e6437b0
MK
1575 ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ?
1576 1ULL << 48 :
1577 1ULL << 32;
1578
8448661d
CW
1579 /* There are only few exceptions for gen >=6. chv and bxt.
1580 * And we are not sure about the latter so play safe for now.
1581 */
1582 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
1583 ppgtt->base.pt_kmap_wc = true;
1584
66df1014
CW
1585 ret = gen8_init_scratch(&ppgtt->base);
1586 if (ret) {
1587 ppgtt->base.total = 0;
1588 return ret;
1589 }
1590
1e6437b0 1591 if (use_4lvl(vm)) {
8448661d 1592 ret = setup_px(&ppgtt->base, &ppgtt->pml4);
762d9936
MT
1593 if (ret)
1594 goto free_scratch;
6ac18502 1595
69ab76fd
MT
1596 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1597
e7167769 1598 ppgtt->switch_mm = gen8_mm_switch_4lvl;
c5d092a4 1599 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
894ccebe 1600 ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
fe52e37f 1601 ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
762d9936 1602 } else {
fe52e37f 1603 ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
81ba8aef
MT
1604 if (ret)
1605 goto free_scratch;
1606
275a991c 1607 if (intel_vgpu_active(dev_priv)) {
e2b763ca
CW
1608 ret = gen8_preallocate_top_level_pdp(ppgtt);
1609 if (ret) {
1610 __pdp_fini(&ppgtt->pdp);
331f38e7 1611 goto free_scratch;
e2b763ca 1612 }
331f38e7 1613 }
894ccebe 1614
e7167769 1615 ppgtt->switch_mm = gen8_mm_switch_3lvl;
c5d092a4 1616 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl;
894ccebe 1617 ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
fe52e37f 1618 ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
81ba8aef 1619 }
6ac18502 1620
275a991c 1621 if (intel_vgpu_active(dev_priv))
650da34c
ZL
1622 gen8_ppgtt_notify_vgt(ppgtt, true);
1623
054b9acd
MK
1624 ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1625 ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1626 ppgtt->base.bind_vma = ppgtt_bind_vma;
fa3f46af
MA
1627 ppgtt->base.set_pages = ppgtt_set_pages;
1628 ppgtt->base.clear_pages = clear_pages;
054b9acd
MK
1629 ppgtt->debug_dump = gen8_dump_ppgtt;
1630
d7b2633d 1631 return 0;
6ac18502
MT
1632
1633free_scratch:
1634 gen8_free_scratch(&ppgtt->base);
1635 return ret;
d7b2633d
MT
1636}
1637
87d60b63
BW
1638static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1639{
87d60b63 1640 struct i915_address_space *vm = &ppgtt->base;
09942c65 1641 struct i915_page_table *unused;
07749ef3 1642 gen6_pte_t scratch_pte;
381b943b
CW
1643 u32 pd_entry, pte, pde;
1644 u32 start = 0, length = ppgtt->base.total;
87d60b63 1645
8bcdd0f7 1646 scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
4fb84d99 1647 I915_CACHE_LLC, 0);
87d60b63 1648
731f74c5 1649 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
87d60b63 1650 u32 expected;
07749ef3 1651 gen6_pte_t *pt_vaddr;
567047be 1652 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
09942c65 1653 pd_entry = readl(ppgtt->pd_addr + pde);
87d60b63
BW
1654 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1655
1656 if (pd_entry != expected)
1657 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1658 pde,
1659 pd_entry,
1660 expected);
1661 seq_printf(m, "\tPDE: %x\n", pd_entry);
1662
9231da70 1663 pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
d1c54acd 1664
07749ef3 1665 for (pte = 0; pte < GEN6_PTES; pte+=4) {
87d60b63 1666 unsigned long va =
07749ef3 1667 (pde * PAGE_SIZE * GEN6_PTES) +
87d60b63
BW
1668 (pte * PAGE_SIZE);
1669 int i;
1670 bool found = false;
1671 for (i = 0; i < 4; i++)
1672 if (pt_vaddr[pte + i] != scratch_pte)
1673 found = true;
1674 if (!found)
1675 continue;
1676
1677 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1678 for (i = 0; i < 4; i++) {
1679 if (pt_vaddr[pte + i] != scratch_pte)
1680 seq_printf(m, " %08x", pt_vaddr[pte + i]);
1681 else
1682 seq_puts(m, " SCRATCH ");
1683 }
1684 seq_puts(m, "\n");
1685 }
9231da70 1686 kunmap_atomic(pt_vaddr);
87d60b63
BW
1687 }
1688}
1689
678d96fb 1690/* Write pde (index) from the page directory @pd to the page table @pt */
16a011c8
CW
1691static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
1692 const unsigned int pde,
1693 const struct i915_page_table *pt)
6197349b 1694{
678d96fb 1695 /* Caller needs to make sure the write completes if necessary */
16a011c8
CW
1696 writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1697 ppgtt->pd_addr + pde);
678d96fb 1698}
6197349b 1699
678d96fb
BW
1700/* Write all the page tables found in the ppgtt structure to incrementing page
1701 * directories. */
16a011c8 1702static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
75c7b0b8 1703 u32 start, u32 length)
678d96fb 1704{
ec565b3c 1705 struct i915_page_table *pt;
16a011c8 1706 unsigned int pde;
678d96fb 1707
16a011c8
CW
1708 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
1709 gen6_write_pde(ppgtt, pde, pt);
678d96fb 1710
16a011c8 1711 mark_tlbs_dirty(ppgtt);
dd19674b 1712 wmb();
3e302542
BW
1713}
1714
75c7b0b8 1715static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
3e302542 1716{
dd19674b
CW
1717 GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1718 return ppgtt->pd.base.ggtt_offset << 10;
b4a74e3a
BW
1719}
1720
90252e5c 1721static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
e85b26dc 1722 struct drm_i915_gem_request *req)
90252e5c 1723{
4a570db5 1724 struct intel_engine_cs *engine = req->engine;
73dec95e 1725 u32 *cs;
90252e5c 1726
90252e5c 1727 /* NB: TLBs must be flushed and invalidated before a switch */
73dec95e
TU
1728 cs = intel_ring_begin(req, 6);
1729 if (IS_ERR(cs))
1730 return PTR_ERR(cs);
90252e5c 1731
73dec95e
TU
1732 *cs++ = MI_LOAD_REGISTER_IMM(2);
1733 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1734 *cs++ = PP_DIR_DCLV_2G;
1735 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1736 *cs++ = get_pd_offset(ppgtt);
1737 *cs++ = MI_NOOP;
1738 intel_ring_advance(req, cs);
90252e5c
BW
1739
1740 return 0;
1741}
1742
48a10389 1743static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
e85b26dc 1744 struct drm_i915_gem_request *req)
48a10389 1745{
4a570db5 1746 struct intel_engine_cs *engine = req->engine;
73dec95e 1747 u32 *cs;
48a10389 1748
48a10389 1749 /* NB: TLBs must be flushed and invalidated before a switch */
73dec95e
TU
1750 cs = intel_ring_begin(req, 6);
1751 if (IS_ERR(cs))
1752 return PTR_ERR(cs);
1753
1754 *cs++ = MI_LOAD_REGISTER_IMM(2);
1755 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1756 *cs++ = PP_DIR_DCLV_2G;
1757 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1758 *cs++ = get_pd_offset(ppgtt);
1759 *cs++ = MI_NOOP;
1760 intel_ring_advance(req, cs);
48a10389
BW
1761
1762 return 0;
1763}
1764
eeb9488e 1765static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
e85b26dc 1766 struct drm_i915_gem_request *req)
eeb9488e 1767{
4a570db5 1768 struct intel_engine_cs *engine = req->engine;
8eb95204 1769 struct drm_i915_private *dev_priv = req->i915;
48a10389 1770
e2f80391
TU
1771 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1772 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
eeb9488e
BW
1773 return 0;
1774}
1775
c6be607a 1776static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
eeb9488e 1777{
e2f80391 1778 struct intel_engine_cs *engine;
3b3f1650 1779 enum intel_engine_id id;
3e302542 1780
3b3f1650 1781 for_each_engine(engine, dev_priv, id) {
c6be607a
TU
1782 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
1783 GEN8_GFX_PPGTT_48B : 0;
e2f80391 1784 I915_WRITE(RING_MODE_GEN7(engine),
2dba3239 1785 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
eeb9488e 1786 }
eeb9488e 1787}
6197349b 1788
c6be607a 1789static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
3e302542 1790{
e2f80391 1791 struct intel_engine_cs *engine;
75c7b0b8 1792 u32 ecochk, ecobits;
3b3f1650 1793 enum intel_engine_id id;
6197349b 1794
b4a74e3a
BW
1795 ecobits = I915_READ(GAC_ECO_BITS);
1796 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
a65c2fcd 1797
b4a74e3a 1798 ecochk = I915_READ(GAM_ECOCHK);
772c2a51 1799 if (IS_HASWELL(dev_priv)) {
b4a74e3a
BW
1800 ecochk |= ECOCHK_PPGTT_WB_HSW;
1801 } else {
1802 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1803 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1804 }
1805 I915_WRITE(GAM_ECOCHK, ecochk);
a65c2fcd 1806
3b3f1650 1807 for_each_engine(engine, dev_priv, id) {
6197349b 1808 /* GFX_MODE is per-ring on gen7+ */
e2f80391 1809 I915_WRITE(RING_MODE_GEN7(engine),
b4a74e3a 1810 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
6197349b 1811 }
b4a74e3a 1812}
6197349b 1813
c6be607a 1814static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
b4a74e3a 1815{
75c7b0b8 1816 u32 ecochk, gab_ctl, ecobits;
a65c2fcd 1817
b4a74e3a
BW
1818 ecobits = I915_READ(GAC_ECO_BITS);
1819 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1820 ECOBITS_PPGTT_CACHE64B);
6197349b 1821
b4a74e3a
BW
1822 gab_ctl = I915_READ(GAB_CTL);
1823 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1824
1825 ecochk = I915_READ(GAM_ECOCHK);
1826 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1827
1828 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
6197349b
BW
1829}
1830
1d2a314c 1831/* PPGTT support for Sandybdrige/Gen6 and later */
853ba5d2 1832static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
dd19674b 1833 u64 start, u64 length)
1d2a314c 1834{
e5716f55 1835 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
dd19674b
CW
1836 unsigned int first_entry = start >> PAGE_SHIFT;
1837 unsigned int pde = first_entry / GEN6_PTES;
1838 unsigned int pte = first_entry % GEN6_PTES;
1839 unsigned int num_entries = length >> PAGE_SHIFT;
1840 gen6_pte_t scratch_pte =
1841 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1d2a314c 1842
7bddb01f 1843 while (num_entries) {
dd19674b
CW
1844 struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
1845 unsigned int end = min(pte + num_entries, GEN6_PTES);
1846 gen6_pte_t *vaddr;
7bddb01f 1847
dd19674b 1848 num_entries -= end - pte;
1d2a314c 1849
dd19674b
CW
1850 /* Note that the hw doesn't support removing PDE on the fly
1851 * (they are cached inside the context with no means to
1852 * invalidate the cache), so we can only reset the PTE
1853 * entries back to scratch.
1854 */
1d2a314c 1855
dd19674b
CW
1856 vaddr = kmap_atomic_px(pt);
1857 do {
1858 vaddr[pte++] = scratch_pte;
1859 } while (pte < end);
1860 kunmap_atomic(vaddr);
1d2a314c 1861
dd19674b 1862 pte = 0;
7bddb01f 1863 }
1d2a314c
DV
1864}
1865
853ba5d2 1866static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
4a234c5f 1867 struct i915_vma *vma,
75c7b0b8
CW
1868 enum i915_cache_level cache_level,
1869 u32 flags)
def886c3 1870{
e5716f55 1871 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
4a234c5f 1872 unsigned first_entry = vma->node.start >> PAGE_SHIFT;
07749ef3
MT
1873 unsigned act_pt = first_entry / GEN6_PTES;
1874 unsigned act_pte = first_entry % GEN6_PTES;
b31144c0 1875 const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
5684514b 1876 struct sgt_dma iter = sgt_dma(vma);
b31144c0
CW
1877 gen6_pte_t *vaddr;
1878
9231da70 1879 vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
b31144c0
CW
1880 do {
1881 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
6e995e23 1882
b31144c0
CW
1883 iter.dma += PAGE_SIZE;
1884 if (iter.dma == iter.max) {
1885 iter.sg = __sg_next(iter.sg);
1886 if (!iter.sg)
1887 break;
6e995e23 1888
b31144c0
CW
1889 iter.dma = sg_dma_address(iter.sg);
1890 iter.max = iter.dma + iter.sg->length;
1891 }
24f3a8cf 1892
07749ef3 1893 if (++act_pte == GEN6_PTES) {
9231da70
CW
1894 kunmap_atomic(vaddr);
1895 vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
6e995e23 1896 act_pte = 0;
def886c3 1897 }
b31144c0 1898 } while (1);
9231da70 1899 kunmap_atomic(vaddr);
d9ec12f8
MA
1900
1901 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
def886c3
DV
1902}
1903
678d96fb 1904static int gen6_alloc_va_range(struct i915_address_space *vm,
dd19674b 1905 u64 start, u64 length)
678d96fb 1906{
e5716f55 1907 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
ec565b3c 1908 struct i915_page_table *pt;
dd19674b
CW
1909 u64 from = start;
1910 unsigned int pde;
1911 bool flush = false;
4933d519 1912
731f74c5 1913 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
dd19674b
CW
1914 if (pt == vm->scratch_pt) {
1915 pt = alloc_pt(vm);
1916 if (IS_ERR(pt))
1917 goto unwind_out;
4933d519 1918
dd19674b
CW
1919 gen6_initialize_pt(vm, pt);
1920 ppgtt->pd.page_table[pde] = pt;
1921 gen6_write_pde(ppgtt, pde, pt);
1922 flush = true;
4933d519 1923 }
4933d519
MT
1924 }
1925
dd19674b
CW
1926 if (flush) {
1927 mark_tlbs_dirty(ppgtt);
1928 wmb();
678d96fb
BW
1929 }
1930
1931 return 0;
4933d519
MT
1932
1933unwind_out:
dd19674b
CW
1934 gen6_ppgtt_clear_range(vm, from, start);
1935 return -ENOMEM;
678d96fb
BW
1936}
1937
8776f02b
MK
1938static int gen6_init_scratch(struct i915_address_space *vm)
1939{
8bcdd0f7 1940 int ret;
8776f02b 1941
8448661d 1942 ret = setup_scratch_page(vm, I915_GFP_DMA);
8bcdd0f7
CW
1943 if (ret)
1944 return ret;
8776f02b 1945
8448661d 1946 vm->scratch_pt = alloc_pt(vm);
8776f02b 1947 if (IS_ERR(vm->scratch_pt)) {
8448661d 1948 cleanup_scratch_page(vm);
8776f02b
MK
1949 return PTR_ERR(vm->scratch_pt);
1950 }
1951
1952 gen6_initialize_pt(vm, vm->scratch_pt);
1953
1954 return 0;
1955}
1956
1957static void gen6_free_scratch(struct i915_address_space *vm)
1958{
8448661d
CW
1959 free_pt(vm, vm->scratch_pt);
1960 cleanup_scratch_page(vm);
8776f02b
MK
1961}
1962
061dd493 1963static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
a00d825d 1964{
e5716f55 1965 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
731f74c5 1966 struct i915_page_directory *pd = &ppgtt->pd;
09942c65 1967 struct i915_page_table *pt;
75c7b0b8 1968 u32 pde;
4933d519 1969
061dd493
DV
1970 drm_mm_remove_node(&ppgtt->node);
1971
731f74c5 1972 gen6_for_all_pdes(pt, pd, pde)
79ab9370 1973 if (pt != vm->scratch_pt)
8448661d 1974 free_pt(vm, pt);
06fda602 1975
8776f02b 1976 gen6_free_scratch(vm);
3440d265
DV
1977}
1978
b146520f 1979static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
3440d265 1980{
8776f02b 1981 struct i915_address_space *vm = &ppgtt->base;
49d73912 1982 struct drm_i915_private *dev_priv = ppgtt->base.i915;
72e96d64 1983 struct i915_ggtt *ggtt = &dev_priv->ggtt;
b146520f 1984 int ret;
1d2a314c 1985
c8d4c0d6
BW
1986 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1987 * allocator works in address space sizes, so it's multiplied by page
1988 * size. We allocate at the top of the GTT to avoid fragmentation.
1989 */
72e96d64 1990 BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
4933d519 1991
8776f02b
MK
1992 ret = gen6_init_scratch(vm);
1993 if (ret)
1994 return ret;
4933d519 1995
e007b19d
CW
1996 ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
1997 GEN6_PD_SIZE, GEN6_PD_ALIGN,
1998 I915_COLOR_UNEVICTABLE,
1999 0, ggtt->base.total,
2000 PIN_HIGH);
c8c26622 2001 if (ret)
678d96fb
BW
2002 goto err_out;
2003
72e96d64 2004 if (ppgtt->node.start < ggtt->mappable_end)
c8d4c0d6 2005 DRM_DEBUG("Forced to use aperture for PDEs\n");
1d2a314c 2006
52c126ee
CW
2007 ppgtt->pd.base.ggtt_offset =
2008 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2009
2010 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2011 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2012
c8c26622 2013 return 0;
678d96fb
BW
2014
2015err_out:
8776f02b 2016 gen6_free_scratch(vm);
678d96fb 2017 return ret;
b146520f
BW
2018}
2019
b146520f
BW
2020static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2021{
2f2cf682 2022 return gen6_ppgtt_allocate_page_directories(ppgtt);
4933d519 2023}
06dc68d6 2024
4933d519 2025static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
75c7b0b8 2026 u64 start, u64 length)
4933d519 2027{
ec565b3c 2028 struct i915_page_table *unused;
75c7b0b8 2029 u32 pde;
1d2a314c 2030
731f74c5 2031 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
79ab9370 2032 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
b146520f
BW
2033}
2034
5c5f6457 2035static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
b146520f 2036{
49d73912 2037 struct drm_i915_private *dev_priv = ppgtt->base.i915;
72e96d64 2038 struct i915_ggtt *ggtt = &dev_priv->ggtt;
b146520f
BW
2039 int ret;
2040
72e96d64 2041 ppgtt->base.pte_encode = ggtt->base.pte_encode;
5db94019 2042 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv))
b146520f 2043 ppgtt->switch_mm = gen6_mm_switch;
772c2a51 2044 else if (IS_HASWELL(dev_priv))
b146520f 2045 ppgtt->switch_mm = hsw_mm_switch;
5db94019 2046 else if (IS_GEN7(dev_priv))
b146520f 2047 ppgtt->switch_mm = gen7_mm_switch;
8eb95204 2048 else
b146520f
BW
2049 BUG();
2050
2051 ret = gen6_ppgtt_alloc(ppgtt);
2052 if (ret)
2053 return ret;
2054
09942c65 2055 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
1d2a314c 2056
5c5f6457 2057 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
16a011c8 2058 gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
678d96fb 2059
52c126ee
CW
2060 ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
2061 if (ret) {
2062 gen6_ppgtt_cleanup(&ppgtt->base);
2063 return ret;
2064 }
2065
054b9acd
MK
2066 ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2067 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2068 ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2069 ppgtt->base.bind_vma = ppgtt_bind_vma;
fa3f46af
MA
2070 ppgtt->base.set_pages = ppgtt_set_pages;
2071 ppgtt->base.clear_pages = clear_pages;
054b9acd
MK
2072 ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2073 ppgtt->debug_dump = gen6_dump_ppgtt;
2074
440fd528 2075 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
b146520f
BW
2076 ppgtt->node.size >> 20,
2077 ppgtt->node.start / PAGE_SIZE);
3440d265 2078
52c126ee
CW
2079 DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
2080 ppgtt->pd.base.ggtt_offset << 10);
fa76da34 2081
b146520f 2082 return 0;
3440d265
DV
2083}
2084
2bfa996e
CW
2085static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2086 struct drm_i915_private *dev_priv)
3440d265 2087{
49d73912 2088 ppgtt->base.i915 = dev_priv;
8448661d 2089 ppgtt->base.dma = &dev_priv->drm.pdev->dev;
3440d265 2090
2bfa996e 2091 if (INTEL_INFO(dev_priv)->gen < 8)
5c5f6457 2092 return gen6_ppgtt_init(ppgtt);
3ed124b2 2093 else
d7b2633d 2094 return gen8_ppgtt_init(ppgtt);
fa76da34 2095}
c114f76a 2096
a2cad9df 2097static void i915_address_space_init(struct i915_address_space *vm,
80b204bc
CW
2098 struct drm_i915_private *dev_priv,
2099 const char *name)
a2cad9df 2100{
80b204bc 2101 i915_gem_timeline_init(dev_priv, &vm->timeline, name);
47db922f 2102
381b943b 2103 drm_mm_init(&vm->mm, 0, vm->total);
47db922f
CW
2104 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
2105
a2cad9df
MW
2106 INIT_LIST_HEAD(&vm->active_list);
2107 INIT_LIST_HEAD(&vm->inactive_list);
50e046b6 2108 INIT_LIST_HEAD(&vm->unbound_list);
47db922f 2109
a2cad9df 2110 list_add_tail(&vm->global_link, &dev_priv->vm_list);
8448661d 2111 pagevec_init(&vm->free_pages, false);
a2cad9df
MW
2112}
2113
ed9724dd
MA
2114static void i915_address_space_fini(struct i915_address_space *vm)
2115{
8448661d 2116 if (pagevec_count(&vm->free_pages))
66df1014 2117 vm_free_pages_release(vm, true);
8448661d 2118
ed9724dd
MA
2119 i915_gem_timeline_fini(&vm->timeline);
2120 drm_mm_takedown(&vm->mm);
2121 list_del(&vm->global_link);
2122}
2123
c6be607a 2124static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
d5165ebd 2125{
d5165ebd
TG
2126 /* This function is for gtt related workarounds. This function is
2127 * called on driver load and after a GPU reset, so you can place
2128 * workarounds here even if they get overwritten by GPU reset.
2129 */
90007bca 2130 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
8652744b 2131 if (IS_BROADWELL(dev_priv))
d5165ebd 2132 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
920a14b2 2133 else if (IS_CHERRYVIEW(dev_priv))
d5165ebd 2134 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
90007bca 2135 else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
d5165ebd 2136 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
9fb5026f 2137 else if (IS_GEN9_LP(dev_priv))
d5165ebd 2138 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
9a6330cf
MA
2139
2140 /*
2141 * To support 64K PTEs we need to first enable the use of the
2142 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2143 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2144 * shouldn't be needed after GEN10.
2145 *
2146 * 64K pages were first introduced from BDW+, although technically they
2147 * only *work* from gen9+. For pre-BDW we instead have the option for
2148 * 32K pages, but we don't currently have any support for it in our
2149 * driver.
2150 */
2151 if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2152 INTEL_GEN(dev_priv) <= 10)
2153 I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2154 I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2155 GAMW_ECO_ENABLE_64K_IPS_FIELD);
d5165ebd
TG
2156}
2157
c6be607a 2158int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
82460d97 2159{
c6be607a 2160 gtt_write_workarounds(dev_priv);
d5165ebd 2161
671b5013
TD
2162 /* In the case of execlists, PPGTT is enabled by the context descriptor
2163 * and the PDPs are contained within the context itself. We don't
2164 * need to do anything here. */
4f044a88 2165 if (i915_modparams.enable_execlists)
671b5013
TD
2166 return 0;
2167
c6be607a 2168 if (!USES_PPGTT(dev_priv))
82460d97
DV
2169 return 0;
2170
5db94019 2171 if (IS_GEN6(dev_priv))
c6be607a 2172 gen6_ppgtt_enable(dev_priv);
5db94019 2173 else if (IS_GEN7(dev_priv))
c6be607a
TU
2174 gen7_ppgtt_enable(dev_priv);
2175 else if (INTEL_GEN(dev_priv) >= 8)
2176 gen8_ppgtt_enable(dev_priv);
82460d97 2177 else
c6be607a 2178 MISSING_CASE(INTEL_GEN(dev_priv));
82460d97 2179
4ad2fd88
JH
2180 return 0;
2181}
1d2a314c 2182
4d884705 2183struct i915_hw_ppgtt *
2bfa996e 2184i915_ppgtt_create(struct drm_i915_private *dev_priv,
80b204bc
CW
2185 struct drm_i915_file_private *fpriv,
2186 const char *name)
4d884705
DV
2187{
2188 struct i915_hw_ppgtt *ppgtt;
2189 int ret;
2190
2191 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2192 if (!ppgtt)
2193 return ERR_PTR(-ENOMEM);
2194
1188bc66 2195 ret = __hw_ppgtt_init(ppgtt, dev_priv);
4d884705
DV
2196 if (ret) {
2197 kfree(ppgtt);
2198 return ERR_PTR(ret);
2199 }
2200
1188bc66
CW
2201 kref_init(&ppgtt->ref);
2202 i915_address_space_init(&ppgtt->base, dev_priv, name);
2203 ppgtt->base.file = fpriv;
2204
198c974d
DCS
2205 trace_i915_ppgtt_create(&ppgtt->base);
2206
4d884705
DV
2207 return ppgtt;
2208}
2209
0c7eeda1
CW
2210void i915_ppgtt_close(struct i915_address_space *vm)
2211{
2212 struct list_head *phases[] = {
2213 &vm->active_list,
2214 &vm->inactive_list,
2215 &vm->unbound_list,
2216 NULL,
2217 }, **phase;
2218
2219 GEM_BUG_ON(vm->closed);
2220 vm->closed = true;
2221
2222 for (phase = phases; *phase; phase++) {
2223 struct i915_vma *vma, *vn;
2224
2225 list_for_each_entry_safe(vma, vn, *phase, vm_link)
2226 if (!i915_vma_is_closed(vma))
2227 i915_vma_close(vma);
2228 }
2229}
2230
ed9724dd 2231void i915_ppgtt_release(struct kref *kref)
ee960be7
DV
2232{
2233 struct i915_hw_ppgtt *ppgtt =
2234 container_of(kref, struct i915_hw_ppgtt, ref);
2235
198c974d
DCS
2236 trace_i915_ppgtt_release(&ppgtt->base);
2237
50e046b6 2238 /* vmas should already be unbound and destroyed */
ee960be7
DV
2239 WARN_ON(!list_empty(&ppgtt->base.active_list));
2240 WARN_ON(!list_empty(&ppgtt->base.inactive_list));
50e046b6 2241 WARN_ON(!list_empty(&ppgtt->base.unbound_list));
ee960be7
DV
2242
2243 ppgtt->base.cleanup(&ppgtt->base);
8448661d 2244 i915_address_space_fini(&ppgtt->base);
ee960be7
DV
2245 kfree(ppgtt);
2246}
1d2a314c 2247
a81cc00c
BW
2248/* Certain Gen5 chipsets require require idling the GPU before
2249 * unmapping anything from the GTT when VT-d is enabled.
2250 */
97d6d7ab 2251static bool needs_idle_maps(struct drm_i915_private *dev_priv)
a81cc00c 2252{
a81cc00c
BW
2253 /* Query intel_iommu to see if we need the workaround. Presumably that
2254 * was loaded first.
2255 */
80debff8 2256 return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
a81cc00c
BW
2257}
2258
b03ec3d6 2259static void gen6_check_and_clear_faults(struct drm_i915_private *dev_priv)
828c7908 2260{
e2f80391 2261 struct intel_engine_cs *engine;
3b3f1650 2262 enum intel_engine_id id;
b03ec3d6 2263 u32 fault;
828c7908 2264
3b3f1650 2265 for_each_engine(engine, dev_priv, id) {
b03ec3d6
MT
2266 fault = I915_READ(RING_FAULT_REG(engine));
2267 if (fault & RING_FAULT_VALID) {
828c7908 2268 DRM_DEBUG_DRIVER("Unexpected fault\n"
59a5d290 2269 "\tAddr: 0x%08lx\n"
828c7908
BW
2270 "\tAddress space: %s\n"
2271 "\tSource ID: %d\n"
2272 "\tType: %d\n",
b03ec3d6
MT
2273 fault & PAGE_MASK,
2274 fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2275 RING_FAULT_SRCID(fault),
2276 RING_FAULT_FAULT_TYPE(fault));
e2f80391 2277 I915_WRITE(RING_FAULT_REG(engine),
b03ec3d6 2278 fault & ~RING_FAULT_VALID);
828c7908
BW
2279 }
2280 }
3b3f1650 2281
b03ec3d6
MT
2282 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2283}
2284
2285static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv)
2286{
2287 u32 fault = I915_READ(GEN8_RING_FAULT_REG);
2288
2289 if (fault & RING_FAULT_VALID) {
2290 DRM_DEBUG_DRIVER("Unexpected fault\n"
2291 "\tAddr: 0x%08lx\n"
2292 "\tEngine ID: %d\n"
2293 "\tSource ID: %d\n"
2294 "\tType: %d\n",
2295 fault & PAGE_MASK,
2296 GEN8_RING_FAULT_ENGINE_ID(fault),
2297 RING_FAULT_SRCID(fault),
2298 RING_FAULT_FAULT_TYPE(fault));
2299 I915_WRITE(GEN8_RING_FAULT_REG,
2300 fault & ~RING_FAULT_VALID);
2301 }
2302
2303 POSTING_READ(GEN8_RING_FAULT_REG);
2304}
2305
2306void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2307{
2308 /* From GEN8 onwards we only have one 'All Engine Fault Register' */
2309 if (INTEL_GEN(dev_priv) >= 8)
2310 gen8_check_and_clear_faults(dev_priv);
2311 else if (INTEL_GEN(dev_priv) >= 6)
2312 gen6_check_and_clear_faults(dev_priv);
2313 else
2314 return;
828c7908
BW
2315}
2316
275a991c 2317void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
828c7908 2318{
72e96d64 2319 struct i915_ggtt *ggtt = &dev_priv->ggtt;
828c7908
BW
2320
2321 /* Don't bother messing with faults pre GEN6 as we have little
2322 * documentation supporting that it's a good idea.
2323 */
275a991c 2324 if (INTEL_GEN(dev_priv) < 6)
828c7908
BW
2325 return;
2326
dc97997a 2327 i915_check_and_clear_faults(dev_priv);
828c7908 2328
381b943b 2329 ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
91e56499 2330
7c3f86b6 2331 i915_ggtt_invalidate(dev_priv);
828c7908
BW
2332}
2333
03ac84f1
CW
2334int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2335 struct sg_table *pages)
7c2e6fdf 2336{
1a292fa5
CW
2337 do {
2338 if (dma_map_sg(&obj->base.dev->pdev->dev,
2339 pages->sgl, pages->nents,
2340 PCI_DMA_BIDIRECTIONAL))
2341 return 0;
2342
2343 /* If the DMA remap fails, one cause can be that we have
2344 * too many objects pinned in a small remapping table,
2345 * such as swiotlb. Incrementally purge all other objects and
2346 * try again - if there are no more pages to remove from
2347 * the DMA remapper, i915_gem_shrink will return 0.
2348 */
2349 GEM_BUG_ON(obj->mm.pages == pages);
2350 } while (i915_gem_shrink(to_i915(obj->base.dev),
912d572d 2351 obj->base.size >> PAGE_SHIFT, NULL,
1a292fa5
CW
2352 I915_SHRINK_BOUND |
2353 I915_SHRINK_UNBOUND |
2354 I915_SHRINK_ACTIVE));
9da3da66 2355
03ac84f1 2356 return -ENOSPC;
7c2e6fdf
DV
2357}
2358
2c642b07 2359static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
94ec8f61 2360{
94ec8f61 2361 writeq(pte, addr);
94ec8f61
BW
2362}
2363
d6473f56
CW
2364static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2365 dma_addr_t addr,
75c7b0b8 2366 u64 offset,
d6473f56
CW
2367 enum i915_cache_level level,
2368 u32 unused)
2369{
7c3f86b6 2370 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
d6473f56 2371 gen8_pte_t __iomem *pte =
7c3f86b6 2372 (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
d6473f56 2373
4fb84d99 2374 gen8_set_pte(pte, gen8_pte_encode(addr, level));
d6473f56 2375
7c3f86b6 2376 ggtt->invalidate(vm->i915);
d6473f56
CW
2377}
2378
94ec8f61 2379static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
4a234c5f 2380 struct i915_vma *vma,
75c7b0b8
CW
2381 enum i915_cache_level level,
2382 u32 unused)
94ec8f61 2383{
ce7fda2e 2384 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
85d1225e
DG
2385 struct sgt_iter sgt_iter;
2386 gen8_pte_t __iomem *gtt_entries;
894ccebe 2387 const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
85d1225e 2388 dma_addr_t addr;
be69459a 2389
894ccebe 2390 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
4a234c5f
MA
2391 gtt_entries += vma->node.start >> PAGE_SHIFT;
2392 for_each_sgt_dma(addr, sgt_iter, vma->pages)
894ccebe 2393 gen8_set_pte(gtt_entries++, pte_encode | addr);
85d1225e 2394
894ccebe 2395 wmb();
94ec8f61 2396
94ec8f61
BW
2397 /* This next bit makes the above posting read even more important. We
2398 * want to flush the TLBs only after we're certain all the PTE updates
2399 * have finished.
2400 */
7c3f86b6 2401 ggtt->invalidate(vm->i915);
94ec8f61
BW
2402}
2403
d6473f56
CW
2404static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2405 dma_addr_t addr,
75c7b0b8 2406 u64 offset,
d6473f56
CW
2407 enum i915_cache_level level,
2408 u32 flags)
2409{
7c3f86b6 2410 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
d6473f56 2411 gen6_pte_t __iomem *pte =
7c3f86b6 2412 (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
d6473f56 2413
4fb84d99 2414 iowrite32(vm->pte_encode(addr, level, flags), pte);
d6473f56 2415
7c3f86b6 2416 ggtt->invalidate(vm->i915);
d6473f56
CW
2417}
2418
e76e9aeb
BW
2419/*
2420 * Binds an object into the global gtt with the specified cache level. The object
2421 * will be accessible to the GPU via commands whose operands reference offsets
2422 * within the global GTT as well as accessible by the GPU through the GMADR
2423 * mapped BAR (dev_priv->mm.gtt->gtt).
2424 */
853ba5d2 2425static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
4a234c5f 2426 struct i915_vma *vma,
75c7b0b8
CW
2427 enum i915_cache_level level,
2428 u32 flags)
e76e9aeb 2429{
ce7fda2e 2430 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
b31144c0 2431 gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
4a234c5f 2432 unsigned int i = vma->node.start >> PAGE_SHIFT;
b31144c0 2433 struct sgt_iter iter;
85d1225e 2434 dma_addr_t addr;
4a234c5f 2435 for_each_sgt_dma(addr, iter, vma->pages)
b31144c0
CW
2436 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2437 wmb();
0f9b91c7
BW
2438
2439 /* This next bit makes the above posting read even more important. We
2440 * want to flush the TLBs only after we're certain all the PTE updates
2441 * have finished.
2442 */
7c3f86b6 2443 ggtt->invalidate(vm->i915);
e76e9aeb
BW
2444}
2445
f7770bfd 2446static void nop_clear_range(struct i915_address_space *vm,
75c7b0b8 2447 u64 start, u64 length)
f7770bfd
CW
2448{
2449}
2450
94ec8f61 2451static void gen8_ggtt_clear_range(struct i915_address_space *vm,
75c7b0b8 2452 u64 start, u64 length)
94ec8f61 2453{
ce7fda2e 2454 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
782f1495
BW
2455 unsigned first_entry = start >> PAGE_SHIFT;
2456 unsigned num_entries = length >> PAGE_SHIFT;
894ccebe
CW
2457 const gen8_pte_t scratch_pte =
2458 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
2459 gen8_pte_t __iomem *gtt_base =
72e96d64
JL
2460 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2461 const int max_entries = ggtt_total_entries(ggtt) - first_entry;
94ec8f61
BW
2462 int i;
2463
2464 if (WARN(num_entries > max_entries,
2465 "First entry = %d; Num entries = %d (max=%d)\n",
2466 first_entry, num_entries, max_entries))
2467 num_entries = max_entries;
2468
94ec8f61
BW
2469 for (i = 0; i < num_entries; i++)
2470 gen8_set_pte(&gtt_base[i], scratch_pte);
94ec8f61
BW
2471}
2472
0ef34ad6
JB
2473static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2474{
2475 struct drm_i915_private *dev_priv = vm->i915;
2476
2477 /*
2478 * Make sure the internal GAM fifo has been cleared of all GTT
2479 * writes before exiting stop_machine(). This guarantees that
2480 * any aperture accesses waiting to start in another process
2481 * cannot back up behind the GTT writes causing a hang.
2482 * The register can be any arbitrary GAM register.
2483 */
2484 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2485}
2486
2487struct insert_page {
2488 struct i915_address_space *vm;
2489 dma_addr_t addr;
2490 u64 offset;
2491 enum i915_cache_level level;
2492};
2493
2494static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2495{
2496 struct insert_page *arg = _arg;
2497
2498 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2499 bxt_vtd_ggtt_wa(arg->vm);
2500
2501 return 0;
2502}
2503
2504static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2505 dma_addr_t addr,
2506 u64 offset,
2507 enum i915_cache_level level,
2508 u32 unused)
2509{
2510 struct insert_page arg = { vm, addr, offset, level };
2511
2512 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2513}
2514
2515struct insert_entries {
2516 struct i915_address_space *vm;
4a234c5f 2517 struct i915_vma *vma;
0ef34ad6
JB
2518 enum i915_cache_level level;
2519};
2520
2521static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2522{
2523 struct insert_entries *arg = _arg;
2524
4a234c5f 2525 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0);
0ef34ad6
JB
2526 bxt_vtd_ggtt_wa(arg->vm);
2527
2528 return 0;
2529}
2530
2531static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
4a234c5f 2532 struct i915_vma *vma,
0ef34ad6
JB
2533 enum i915_cache_level level,
2534 u32 unused)
2535{
17369ba0 2536 struct insert_entries arg = { vm, vma, level };
0ef34ad6
JB
2537
2538 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2539}
2540
2541struct clear_range {
2542 struct i915_address_space *vm;
2543 u64 start;
2544 u64 length;
2545};
2546
2547static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2548{
2549 struct clear_range *arg = _arg;
2550
2551 gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2552 bxt_vtd_ggtt_wa(arg->vm);
2553
2554 return 0;
2555}
2556
2557static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2558 u64 start,
2559 u64 length)
2560{
2561 struct clear_range arg = { vm, start, length };
2562
2563 stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2564}
2565
853ba5d2 2566static void gen6_ggtt_clear_range(struct i915_address_space *vm,
75c7b0b8 2567 u64 start, u64 length)
7faf1ab2 2568{
ce7fda2e 2569 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
782f1495
BW
2570 unsigned first_entry = start >> PAGE_SHIFT;
2571 unsigned num_entries = length >> PAGE_SHIFT;
07749ef3 2572 gen6_pte_t scratch_pte, __iomem *gtt_base =
72e96d64
JL
2573 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2574 const int max_entries = ggtt_total_entries(ggtt) - first_entry;
7faf1ab2
DV
2575 int i;
2576
2577 if (WARN(num_entries > max_entries,
2578 "First entry = %d; Num entries = %d (max=%d)\n",
2579 first_entry, num_entries, max_entries))
2580 num_entries = max_entries;
2581
8bcdd0f7 2582 scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
4fb84d99 2583 I915_CACHE_LLC, 0);
828c7908 2584
7faf1ab2
DV
2585 for (i = 0; i < num_entries; i++)
2586 iowrite32(scratch_pte, &gtt_base[i]);
7faf1ab2
DV
2587}
2588
d6473f56
CW
2589static void i915_ggtt_insert_page(struct i915_address_space *vm,
2590 dma_addr_t addr,
75c7b0b8 2591 u64 offset,
d6473f56
CW
2592 enum i915_cache_level cache_level,
2593 u32 unused)
2594{
d6473f56
CW
2595 unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2596 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
d6473f56
CW
2597
2598 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
d6473f56
CW
2599}
2600
d369d2d9 2601static void i915_ggtt_insert_entries(struct i915_address_space *vm,
4a234c5f 2602 struct i915_vma *vma,
75c7b0b8
CW
2603 enum i915_cache_level cache_level,
2604 u32 unused)
7faf1ab2
DV
2605{
2606 unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2607 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2608
4a234c5f
MA
2609 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2610 flags);
7faf1ab2
DV
2611}
2612
853ba5d2 2613static void i915_ggtt_clear_range(struct i915_address_space *vm,
75c7b0b8 2614 u64 start, u64 length)
7faf1ab2 2615{
2eedfc7d 2616 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
7faf1ab2
DV
2617}
2618
70b9f6f8
DV
2619static int ggtt_bind_vma(struct i915_vma *vma,
2620 enum i915_cache_level cache_level,
2621 u32 flags)
0a878716 2622{
49d73912 2623 struct drm_i915_private *i915 = vma->vm->i915;
0a878716 2624 struct drm_i915_gem_object *obj = vma->obj;
ba7a5741 2625 u32 pte_flags;
0a878716 2626
0a878716 2627 /* Currently applicable only to VLV */
ba7a5741 2628 pte_flags = 0;
0a878716
DV
2629 if (obj->gt_ro)
2630 pte_flags |= PTE_READ_ONLY;
2631
9c870d03 2632 intel_runtime_pm_get(i915);
4a234c5f 2633 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
9c870d03 2634 intel_runtime_pm_put(i915);
0a878716 2635
d9ec12f8
MA
2636 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2637
0a878716
DV
2638 /*
2639 * Without aliasing PPGTT there's no difference between
2640 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2641 * upgrade to both bound if we bind either to avoid double-binding.
2642 */
3272db53 2643 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
0a878716
DV
2644
2645 return 0;
2646}
2647
cbc4e9e6
CW
2648static void ggtt_unbind_vma(struct i915_vma *vma)
2649{
2650 struct drm_i915_private *i915 = vma->vm->i915;
2651
2652 intel_runtime_pm_get(i915);
2653 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2654 intel_runtime_pm_put(i915);
2655}
2656
0a878716
DV
2657static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2658 enum i915_cache_level cache_level,
2659 u32 flags)
d5bd1449 2660{
49d73912 2661 struct drm_i915_private *i915 = vma->vm->i915;
321d178e 2662 u32 pte_flags;
ff685975 2663 int ret;
70b9f6f8 2664
24f3a8cf 2665 /* Currently applicable only to VLV */
321d178e
CW
2666 pte_flags = 0;
2667 if (vma->obj->gt_ro)
f329f5f6 2668 pte_flags |= PTE_READ_ONLY;
24f3a8cf 2669
ff685975
CW
2670 if (flags & I915_VMA_LOCAL_BIND) {
2671 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2672
1f23475c
MA
2673 if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
2674 appgtt->base.allocate_va_range) {
ff685975
CW
2675 ret = appgtt->base.allocate_va_range(&appgtt->base,
2676 vma->node.start,
d567232c 2677 vma->size);
ff685975 2678 if (ret)
fa3f46af 2679 return ret;
ff685975
CW
2680 }
2681
4a234c5f
MA
2682 appgtt->base.insert_entries(&appgtt->base, vma, cache_level,
2683 pte_flags);
ff685975
CW
2684 }
2685
3272db53 2686 if (flags & I915_VMA_GLOBAL_BIND) {
9c870d03 2687 intel_runtime_pm_get(i915);
4a234c5f 2688 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
9c870d03 2689 intel_runtime_pm_put(i915);
6f65e29a 2690 }
d5bd1449 2691
70b9f6f8 2692 return 0;
d5bd1449
CW
2693}
2694
cbc4e9e6 2695static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
74163907 2696{
49d73912 2697 struct drm_i915_private *i915 = vma->vm->i915;
6f65e29a 2698
9c870d03
CW
2699 if (vma->flags & I915_VMA_GLOBAL_BIND) {
2700 intel_runtime_pm_get(i915);
cbc4e9e6 2701 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
9c870d03
CW
2702 intel_runtime_pm_put(i915);
2703 }
06615ee5 2704
cbc4e9e6
CW
2705 if (vma->flags & I915_VMA_LOCAL_BIND) {
2706 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;
2707
2708 vm->clear_range(vm, vma->node.start, vma->size);
2709 }
74163907
DV
2710}
2711
03ac84f1
CW
2712void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2713 struct sg_table *pages)
7c2e6fdf 2714{
52a05c30
DW
2715 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2716 struct device *kdev = &dev_priv->drm.pdev->dev;
307dc25b 2717 struct i915_ggtt *ggtt = &dev_priv->ggtt;
5c042287 2718
307dc25b 2719 if (unlikely(ggtt->do_idle_maps)) {
228ec87c 2720 if (i915_gem_wait_for_idle(dev_priv, 0)) {
307dc25b
CW
2721 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2722 /* Wait a bit, in hopes it avoids the hang */
2723 udelay(10);
2724 }
2725 }
5c042287 2726
03ac84f1 2727 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
7c2e6fdf 2728}
644ec02b 2729
fa3f46af
MA
2730static int ggtt_set_pages(struct i915_vma *vma)
2731{
2732 int ret;
2733
2734 GEM_BUG_ON(vma->pages);
2735
2736 ret = i915_get_ggtt_vma_pages(vma);
2737 if (ret)
2738 return ret;
2739
7464284b
MA
2740 vma->page_sizes = vma->obj->mm.page_sizes;
2741
fa3f46af
MA
2742 return 0;
2743}
2744
45b186f1 2745static void i915_gtt_color_adjust(const struct drm_mm_node *node,
42d6ab48 2746 unsigned long color,
440fd528
TR
2747 u64 *start,
2748 u64 *end)
42d6ab48 2749{
a6508ded 2750 if (node->allocated && node->color != color)
f51455d4 2751 *start += I915_GTT_PAGE_SIZE;
42d6ab48 2752
a6508ded
CW
2753 /* Also leave a space between the unallocated reserved node after the
2754 * GTT and any objects within the GTT, i.e. we use the color adjustment
2755 * to insert a guard page to prevent prefetches crossing over the
2756 * GTT boundary.
2757 */
b44f97fd 2758 node = list_next_entry(node, node_list);
a6508ded 2759 if (node->color != color)
f51455d4 2760 *end -= I915_GTT_PAGE_SIZE;
42d6ab48 2761}
fbe5d36e 2762
6cde9a02
CW
2763int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2764{
2765 struct i915_ggtt *ggtt = &i915->ggtt;
2766 struct i915_hw_ppgtt *ppgtt;
2767 int err;
2768
57202f47 2769 ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]");
1188bc66
CW
2770 if (IS_ERR(ppgtt))
2771 return PTR_ERR(ppgtt);
6cde9a02 2772
e565ceb0
CW
2773 if (WARN_ON(ppgtt->base.total < ggtt->base.total)) {
2774 err = -ENODEV;
2775 goto err_ppgtt;
2776 }
2777
6cde9a02 2778 if (ppgtt->base.allocate_va_range) {
e565ceb0
CW
2779 /* Note we only pre-allocate as far as the end of the global
2780 * GTT. On 48b / 4-level page-tables, the difference is very,
2781 * very significant! We have to preallocate as GVT/vgpu does
2782 * not like the page directory disappearing.
2783 */
6cde9a02 2784 err = ppgtt->base.allocate_va_range(&ppgtt->base,
e565ceb0 2785 0, ggtt->base.total);
6cde9a02 2786 if (err)
1188bc66 2787 goto err_ppgtt;
6cde9a02
CW
2788 }
2789
6cde9a02 2790 i915->mm.aliasing_ppgtt = ppgtt;
cbc4e9e6 2791
6cde9a02
CW
2792 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2793 ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2794
cbc4e9e6
CW
2795 WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
2796 ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;
2797
6cde9a02
CW
2798 return 0;
2799
6cde9a02 2800err_ppgtt:
1188bc66 2801 i915_ppgtt_put(ppgtt);
6cde9a02
CW
2802 return err;
2803}
2804
2805void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2806{
2807 struct i915_ggtt *ggtt = &i915->ggtt;
2808 struct i915_hw_ppgtt *ppgtt;
2809
2810 ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2811 if (!ppgtt)
2812 return;
2813
1188bc66 2814 i915_ppgtt_put(ppgtt);
6cde9a02
CW
2815
2816 ggtt->base.bind_vma = ggtt_bind_vma;
cbc4e9e6 2817 ggtt->base.unbind_vma = ggtt_unbind_vma;
6cde9a02
CW
2818}
2819
f6b9d5ca 2820int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
644ec02b 2821{
e78891ca
BW
2822 /* Let GEM Manage all of the aperture.
2823 *
2824 * However, leave one page at the end still bound to the scratch page.
2825 * There are a number of places where the hardware apparently prefetches
2826 * past the end of the object, and we've seen multiple hangs with the
2827 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2828 * aperture. One page should be enough to keep any prefetching inside
2829 * of the aperture.
2830 */
72e96d64 2831 struct i915_ggtt *ggtt = &dev_priv->ggtt;
ed2f3452 2832 unsigned long hole_start, hole_end;
f6b9d5ca 2833 struct drm_mm_node *entry;
fa76da34 2834 int ret;
644ec02b 2835
b02d22a3
ZW
2836 ret = intel_vgt_balloon(dev_priv);
2837 if (ret)
2838 return ret;
5dda8fa3 2839
95374d75 2840 /* Reserve a mappable slot for our lockless error capture */
4e64e553
CW
2841 ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
2842 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2843 0, ggtt->mappable_end,
2844 DRM_MM_INSERT_LOW);
95374d75
CW
2845 if (ret)
2846 return ret;
2847
ed2f3452 2848 /* Clear any non-preallocated blocks */
72e96d64 2849 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
ed2f3452
CW
2850 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2851 hole_start, hole_end);
72e96d64 2852 ggtt->base.clear_range(&ggtt->base, hole_start,
4fb84d99 2853 hole_end - hole_start);
ed2f3452
CW
2854 }
2855
2856 /* And finally clear the reserved guard page */
f6b9d5ca 2857 ggtt->base.clear_range(&ggtt->base,
4fb84d99 2858 ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
6c5566a8 2859
97d6d7ab 2860 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
6cde9a02 2861 ret = i915_gem_init_aliasing_ppgtt(dev_priv);
95374d75 2862 if (ret)
6cde9a02 2863 goto err;
fa76da34
DV
2864 }
2865
6c5566a8 2866 return 0;
95374d75 2867
95374d75
CW
2868err:
2869 drm_mm_remove_node(&ggtt->error_capture);
2870 return ret;
e76e9aeb
BW
2871}
2872
d85489d3
JL
2873/**
2874 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
97d6d7ab 2875 * @dev_priv: i915 device
d85489d3 2876 */
97d6d7ab 2877void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
90d0a0e8 2878{
72e96d64 2879 struct i915_ggtt *ggtt = &dev_priv->ggtt;
94d4a2a9 2880 struct i915_vma *vma, *vn;
66df1014 2881 struct pagevec *pvec;
94d4a2a9
CW
2882
2883 ggtt->base.closed = true;
2884
2885 mutex_lock(&dev_priv->drm.struct_mutex);
2886 WARN_ON(!list_empty(&ggtt->base.active_list));
2887 list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link)
2888 WARN_ON(i915_vma_unbind(vma));
2889 mutex_unlock(&dev_priv->drm.struct_mutex);
90d0a0e8 2890
97d6d7ab 2891 i915_gem_cleanup_stolen(&dev_priv->drm);
a4eba47b 2892
1188bc66
CW
2893 mutex_lock(&dev_priv->drm.struct_mutex);
2894 i915_gem_fini_aliasing_ppgtt(dev_priv);
2895
95374d75
CW
2896 if (drm_mm_node_allocated(&ggtt->error_capture))
2897 drm_mm_remove_node(&ggtt->error_capture);
2898
72e96d64 2899 if (drm_mm_initialized(&ggtt->base.mm)) {
b02d22a3 2900 intel_vgt_deballoon(dev_priv);
ed9724dd 2901 i915_address_space_fini(&ggtt->base);
90d0a0e8
DV
2902 }
2903
72e96d64 2904 ggtt->base.cleanup(&ggtt->base);
66df1014
CW
2905
2906 pvec = &dev_priv->mm.wc_stash;
2907 if (pvec->nr) {
2908 set_pages_array_wb(pvec->pages, pvec->nr);
2909 __pagevec_release(pvec);
2910 }
2911
1188bc66 2912 mutex_unlock(&dev_priv->drm.struct_mutex);
f6b9d5ca
CW
2913
2914 arch_phys_wc_del(ggtt->mtrr);
f7bbe788 2915 io_mapping_fini(&ggtt->mappable);
90d0a0e8 2916}
70e32544 2917
2c642b07 2918static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
e76e9aeb
BW
2919{
2920 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2921 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2922 return snb_gmch_ctl << 20;
2923}
2924
2c642b07 2925static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
9459d252
BW
2926{
2927 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2928 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2929 if (bdw_gmch_ctl)
2930 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
562d55d9
BW
2931
2932#ifdef CONFIG_X86_32
2933 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2934 if (bdw_gmch_ctl > 4)
2935 bdw_gmch_ctl = 4;
2936#endif
2937
9459d252
BW
2938 return bdw_gmch_ctl << 20;
2939}
2940
2c642b07 2941static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
d7f25f23
DL
2942{
2943 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2944 gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2945
2946 if (gmch_ctrl)
2947 return 1 << (20 + gmch_ctrl);
2948
2949 return 0;
2950}
2951
2c642b07 2952static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
e76e9aeb
BW
2953{
2954 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2955 snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
a92d1a91 2956 return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
e76e9aeb
BW
2957}
2958
2c642b07 2959static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
9459d252
BW
2960{
2961 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2962 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
a92d1a91 2963 return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
9459d252
BW
2964}
2965
d7f25f23
DL
2966static size_t chv_get_stolen_size(u16 gmch_ctrl)
2967{
2968 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2969 gmch_ctrl &= SNB_GMCH_GMS_MASK;
2970
2971 /*
2972 * 0x0 to 0x10: 32MB increments starting at 0MB
2973 * 0x11 to 0x16: 4MB increments starting at 8MB
2974 * 0x17 to 0x1d: 4MB increments start at 36MB
2975 */
2976 if (gmch_ctrl < 0x11)
a92d1a91 2977 return (size_t)gmch_ctrl << 25;
d7f25f23 2978 else if (gmch_ctrl < 0x17)
a92d1a91 2979 return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
d7f25f23 2980 else
a92d1a91 2981 return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
d7f25f23
DL
2982}
2983
66375014
DL
2984static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2985{
2986 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2987 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2988
2989 if (gen9_gmch_ctl < 0xf0)
a92d1a91 2990 return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
66375014
DL
2991 else
2992 /* 4MB increments starting at 0xf0 for 4MB */
a92d1a91 2993 return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
66375014
DL
2994}
2995
34c998b4 2996static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
63340133 2997{
49d73912
CW
2998 struct drm_i915_private *dev_priv = ggtt->base.i915;
2999 struct pci_dev *pdev = dev_priv->drm.pdev;
34c998b4 3000 phys_addr_t phys_addr;
8bcdd0f7 3001 int ret;
63340133
BW
3002
3003 /* For Modern GENs the PTEs and register space are split in the BAR */
34c998b4 3004 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
63340133 3005
2a073f89 3006 /*
385db982
RV
3007 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
3008 * will be dropped. For WC mappings in general we have 64 byte burst
3009 * writes when the WC buffer is flushed, so we can't use it, but have to
2a073f89
ID
3010 * resort to an uncached mapping. The WC issue is easily caught by the
3011 * readback check when writing GTT PTE entries.
3012 */
385db982 3013 if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
34c998b4 3014 ggtt->gsm = ioremap_nocache(phys_addr, size);
2a073f89 3015 else
34c998b4 3016 ggtt->gsm = ioremap_wc(phys_addr, size);
72e96d64 3017 if (!ggtt->gsm) {
34c998b4 3018 DRM_ERROR("Failed to map the ggtt page table\n");
63340133
BW
3019 return -ENOMEM;
3020 }
3021
8448661d 3022 ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
8bcdd0f7 3023 if (ret) {
63340133
BW
3024 DRM_ERROR("Scratch setup failed\n");
3025 /* iounmap will also get called at remove, but meh */
72e96d64 3026 iounmap(ggtt->gsm);
8bcdd0f7 3027 return ret;
63340133
BW
3028 }
3029
4ad2af1e 3030 return 0;
63340133
BW
3031}
3032
4395890a
ZW
3033static struct intel_ppat_entry *
3034__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
4e34935f 3035{
4395890a
ZW
3036 struct intel_ppat_entry *entry = &ppat->entries[index];
3037
3038 GEM_BUG_ON(index >= ppat->max_entries);
3039 GEM_BUG_ON(test_bit(index, ppat->used));
3040
3041 entry->ppat = ppat;
3042 entry->value = value;
3043 kref_init(&entry->ref);
3044 set_bit(index, ppat->used);
3045 set_bit(index, ppat->dirty);
3046
3047 return entry;
3048}
3049
3050static void __free_ppat_entry(struct intel_ppat_entry *entry)
3051{
3052 struct intel_ppat *ppat = entry->ppat;
3053 unsigned int index = entry - ppat->entries;
3054
3055 GEM_BUG_ON(index >= ppat->max_entries);
3056 GEM_BUG_ON(!test_bit(index, ppat->used));
3057
3058 entry->value = ppat->clear_value;
3059 clear_bit(index, ppat->used);
3060 set_bit(index, ppat->dirty);
3061}
3062
3063/**
3064 * intel_ppat_get - get a usable PPAT entry
3065 * @i915: i915 device instance
3066 * @value: the PPAT value required by the caller
3067 *
3068 * The function tries to search if there is an existing PPAT entry which
3069 * matches with the required value. If perfectly matched, the existing PPAT
3070 * entry will be used. If only partially matched, it will try to check if
3071 * there is any available PPAT index. If yes, it will allocate a new PPAT
3072 * index for the required entry and update the HW. If not, the partially
3073 * matched entry will be used.
3074 */
3075const struct intel_ppat_entry *
3076intel_ppat_get(struct drm_i915_private *i915, u8 value)
3077{
3078 struct intel_ppat *ppat = &i915->ppat;
3079 struct intel_ppat_entry *entry;
3080 unsigned int scanned, best_score;
3081 int i;
3082
3083 GEM_BUG_ON(!ppat->max_entries);
3084
3085 scanned = best_score = 0;
3086 for_each_set_bit(i, ppat->used, ppat->max_entries) {
3087 unsigned int score;
3088
3089 score = ppat->match(ppat->entries[i].value, value);
3090 if (score > best_score) {
3091 entry = &ppat->entries[i];
3092 if (score == INTEL_PPAT_PERFECT_MATCH) {
3093 kref_get(&entry->ref);
3094 return entry;
3095 }
3096 best_score = score;
3097 }
3098 scanned++;
3099 }
3100
3101 if (scanned == ppat->max_entries) {
3102 if (!best_score)
3103 return ERR_PTR(-ENOSPC);
3104
3105 kref_get(&entry->ref);
3106 return entry;
3107 }
3108
3109 i = find_first_zero_bit(ppat->used, ppat->max_entries);
3110 entry = __alloc_ppat_entry(ppat, i, value);
3111 ppat->update_hw(i915);
3112 return entry;
3113}
3114
3115static void release_ppat(struct kref *kref)
3116{
3117 struct intel_ppat_entry *entry =
3118 container_of(kref, struct intel_ppat_entry, ref);
3119 struct drm_i915_private *i915 = entry->ppat->i915;
3120
3121 __free_ppat_entry(entry);
3122 entry->ppat->update_hw(i915);
3123}
3124
3125/**
3126 * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3127 * @entry: an intel PPAT entry
3128 *
3129 * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3130 * entry is dynamically allocated, its reference count will be decreased. Once
3131 * the reference count becomes into zero, the PPAT index becomes free again.
3132 */
3133void intel_ppat_put(const struct intel_ppat_entry *entry)
3134{
3135 struct intel_ppat *ppat = entry->ppat;
3136 unsigned int index = entry - ppat->entries;
3137
3138 GEM_BUG_ON(!ppat->max_entries);
3139
3140 kref_put(&ppat->entries[index].ref, release_ppat);
3141}
3142
3143static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3144{
3145 struct intel_ppat *ppat = &dev_priv->ppat;
3146 int i;
3147
3148 for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3149 I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3150 clear_bit(i, ppat->dirty);
3151 }
3152}
3153
3154static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3155{
3156 struct intel_ppat *ppat = &dev_priv->ppat;
3157 u64 pat = 0;
3158 int i;
3159
3160 for (i = 0; i < ppat->max_entries; i++)
3161 pat |= GEN8_PPAT(i, ppat->entries[i].value);
3162
3163 bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3164
3165 I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3166 I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3167}
3168
3169static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3170{
3171 unsigned int score = 0;
3172 enum {
3173 AGE_MATCH = BIT(0),
3174 TC_MATCH = BIT(1),
3175 CA_MATCH = BIT(2),
3176 };
3177
3178 /* Cache attribute has to be matched. */
1298d51c 3179 if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
4395890a
ZW
3180 return 0;
3181
3182 score |= CA_MATCH;
3183
3184 if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3185 score |= TC_MATCH;
3186
3187 if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3188 score |= AGE_MATCH;
3189
3190 if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3191 return INTEL_PPAT_PERFECT_MATCH;
3192
3193 return score;
3194}
3195
3196static unsigned int chv_private_pat_match(u8 src, u8 dst)
3197{
3198 return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3199 INTEL_PPAT_PERFECT_MATCH : 0;
3200}
3201
3202static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3203{
3204 ppat->max_entries = 8;
3205 ppat->update_hw = cnl_private_pat_update_hw;
3206 ppat->match = bdw_private_pat_match;
3207 ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3208
4395890a
ZW
3209 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3210 __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3211 __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3212 __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3213 __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3214 __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3215 __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3216 __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
4e34935f
RV
3217}
3218
fbe5d36e
BW
3219/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3220 * bits. When using advanced contexts each context stores its own PAT, but
3221 * writing this data shouldn't be harmful even in those cases. */
4395890a 3222static void bdw_setup_private_ppat(struct intel_ppat *ppat)
fbe5d36e 3223{
4395890a
ZW
3224 ppat->max_entries = 8;
3225 ppat->update_hw = bdw_private_pat_update_hw;
3226 ppat->match = bdw_private_pat_match;
3227 ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
fbe5d36e 3228
4395890a 3229 if (!USES_PPGTT(ppat->i915)) {
d6a8b72e
RV
3230 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3231 * so RTL will always use the value corresponding to
3232 * pat_sel = 000".
3233 * So let's disable cache for GGTT to avoid screen corruptions.
3234 * MOCS still can be used though.
3235 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3236 * before this patch, i.e. the same uncached + snooping access
3237 * like on gen6/7 seems to be in effect.
3238 * - So this just fixes blitter/render access. Again it looks
3239 * like it's not just uncached access, but uncached + snooping.
3240 * So we can still hold onto all our assumptions wrt cpu
3241 * clflushing on LLC machines.
3242 */
4395890a
ZW
3243 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3244 return;
3245 }
d6a8b72e 3246
4395890a
ZW
3247 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */
3248 __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */
3249 __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
3250 __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */
3251 __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3252 __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3253 __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3254 __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
fbe5d36e
BW
3255}
3256
4395890a 3257static void chv_setup_private_ppat(struct intel_ppat *ppat)
ee0ce478 3258{
4395890a
ZW
3259 ppat->max_entries = 8;
3260 ppat->update_hw = bdw_private_pat_update_hw;
3261 ppat->match = chv_private_pat_match;
3262 ppat->clear_value = CHV_PPAT_SNOOP;
ee0ce478
VS
3263
3264 /*
3265 * Map WB on BDW to snooped on CHV.
3266 *
3267 * Only the snoop bit has meaning for CHV, the rest is
3268 * ignored.
3269 *
cf3d262e
VS
3270 * The hardware will never snoop for certain types of accesses:
3271 * - CPU GTT (GMADR->GGTT->no snoop->memory)
3272 * - PPGTT page tables
3273 * - some other special cycles
3274 *
3275 * As with BDW, we also need to consider the following for GT accesses:
3276 * "For GGTT, there is NO pat_sel[2:0] from the entry,
3277 * so RTL will always use the value corresponding to
3278 * pat_sel = 000".
3279 * Which means we must set the snoop bit in PAT entry 0
3280 * in order to keep the global status page working.
ee0ce478 3281 */
ee0ce478 3282
4395890a
ZW
3283 __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3284 __alloc_ppat_entry(ppat, 1, 0);
3285 __alloc_ppat_entry(ppat, 2, 0);
3286 __alloc_ppat_entry(ppat, 3, 0);
3287 __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3288 __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3289 __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3290 __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
ee0ce478
VS
3291}
3292
34c998b4
CW
3293static void gen6_gmch_remove(struct i915_address_space *vm)
3294{
3295 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3296
3297 iounmap(ggtt->gsm);
8448661d 3298 cleanup_scratch_page(vm);
34c998b4
CW
3299}
3300
36e16c49
ZW
3301static void setup_private_pat(struct drm_i915_private *dev_priv)
3302{
4395890a
ZW
3303 struct intel_ppat *ppat = &dev_priv->ppat;
3304 int i;
3305
3306 ppat->i915 = dev_priv;
3307
36e16c49 3308 if (INTEL_GEN(dev_priv) >= 10)
4395890a 3309 cnl_setup_private_ppat(ppat);
36e16c49 3310 else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
4395890a 3311 chv_setup_private_ppat(ppat);
36e16c49 3312 else
4395890a
ZW
3313 bdw_setup_private_ppat(ppat);
3314
3315 GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3316
3317 for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3318 ppat->entries[i].value = ppat->clear_value;
3319 ppat->entries[i].ppat = ppat;
3320 set_bit(i, ppat->dirty);
3321 }
3322
3323 ppat->update_hw(dev_priv);
36e16c49
ZW
3324}
3325
d507d735 3326static int gen8_gmch_probe(struct i915_ggtt *ggtt)
63340133 3327{
49d73912 3328 struct drm_i915_private *dev_priv = ggtt->base.i915;
97d6d7ab 3329 struct pci_dev *pdev = dev_priv->drm.pdev;
34c998b4 3330 unsigned int size;
63340133 3331 u16 snb_gmch_ctl;
4519290a 3332 int err;
63340133
BW
3333
3334 /* TODO: We're not aware of mappable constraints on gen8 yet */
97d6d7ab
CW
3335 ggtt->mappable_base = pci_resource_start(pdev, 2);
3336 ggtt->mappable_end = pci_resource_len(pdev, 2);
63340133 3337
4519290a
ID
3338 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3339 if (!err)
3340 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3341 if (err)
3342 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
63340133 3343
97d6d7ab 3344 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
63340133 3345
97d6d7ab 3346 if (INTEL_GEN(dev_priv) >= 9) {
d507d735 3347 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
34c998b4 3348 size = gen8_get_total_gtt_size(snb_gmch_ctl);
97d6d7ab 3349 } else if (IS_CHERRYVIEW(dev_priv)) {
d507d735 3350 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
34c998b4 3351 size = chv_get_total_gtt_size(snb_gmch_ctl);
d7f25f23 3352 } else {
d507d735 3353 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
34c998b4 3354 size = gen8_get_total_gtt_size(snb_gmch_ctl);
d7f25f23 3355 }
63340133 3356
34c998b4 3357 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
34c998b4 3358 ggtt->base.cleanup = gen6_gmch_remove;
d507d735
JL
3359 ggtt->base.bind_vma = ggtt_bind_vma;
3360 ggtt->base.unbind_vma = ggtt_unbind_vma;
fa3f46af
MA
3361 ggtt->base.set_pages = ggtt_set_pages;
3362 ggtt->base.clear_pages = clear_pages;
d6473f56 3363 ggtt->base.insert_page = gen8_ggtt_insert_page;
f7770bfd 3364 ggtt->base.clear_range = nop_clear_range;
48f112fe 3365 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
f7770bfd
CW
3366 ggtt->base.clear_range = gen8_ggtt_clear_range;
3367
3368 ggtt->base.insert_entries = gen8_ggtt_insert_entries;
f7770bfd 3369
0ef34ad6
JB
3370 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3371 if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3372 ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3373 ggtt->base.insert_page = bxt_vtd_ggtt_insert_page__BKL;
3374 if (ggtt->base.clear_range != nop_clear_range)
3375 ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3376 }
3377
7c3f86b6
CW
3378 ggtt->invalidate = gen6_ggtt_invalidate;
3379
36e16c49
ZW
3380 setup_private_pat(dev_priv);
3381
34c998b4 3382 return ggtt_probe_common(ggtt, size);
63340133
BW
3383}
3384
d507d735 3385static int gen6_gmch_probe(struct i915_ggtt *ggtt)
e76e9aeb 3386{
49d73912 3387 struct drm_i915_private *dev_priv = ggtt->base.i915;
97d6d7ab 3388 struct pci_dev *pdev = dev_priv->drm.pdev;
34c998b4 3389 unsigned int size;
e76e9aeb 3390 u16 snb_gmch_ctl;
4519290a 3391 int err;
e76e9aeb 3392
97d6d7ab
CW
3393 ggtt->mappable_base = pci_resource_start(pdev, 2);
3394 ggtt->mappable_end = pci_resource_len(pdev, 2);
41907ddc 3395
baa09f5f
BW
3396 /* 64/512MB is the current min/max we actually know of, but this is just
3397 * a coarse sanity check.
e76e9aeb 3398 */
34c998b4 3399 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
d507d735 3400 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
baa09f5f 3401 return -ENXIO;
e76e9aeb
BW
3402 }
3403
4519290a
ID
3404 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3405 if (!err)
3406 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3407 if (err)
3408 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
97d6d7ab 3409 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
e76e9aeb 3410
d507d735 3411 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
e76e9aeb 3412
34c998b4
CW
3413 size = gen6_get_total_gtt_size(snb_gmch_ctl);
3414 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
e76e9aeb 3415
d507d735 3416 ggtt->base.clear_range = gen6_ggtt_clear_range;
d6473f56 3417 ggtt->base.insert_page = gen6_ggtt_insert_page;
d507d735
JL
3418 ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3419 ggtt->base.bind_vma = ggtt_bind_vma;
3420 ggtt->base.unbind_vma = ggtt_unbind_vma;
fa3f46af
MA
3421 ggtt->base.set_pages = ggtt_set_pages;
3422 ggtt->base.clear_pages = clear_pages;
34c998b4
CW
3423 ggtt->base.cleanup = gen6_gmch_remove;
3424
7c3f86b6
CW
3425 ggtt->invalidate = gen6_ggtt_invalidate;
3426
34c998b4
CW
3427 if (HAS_EDRAM(dev_priv))
3428 ggtt->base.pte_encode = iris_pte_encode;
3429 else if (IS_HASWELL(dev_priv))
3430 ggtt->base.pte_encode = hsw_pte_encode;
3431 else if (IS_VALLEYVIEW(dev_priv))
3432 ggtt->base.pte_encode = byt_pte_encode;
3433 else if (INTEL_GEN(dev_priv) >= 7)
3434 ggtt->base.pte_encode = ivb_pte_encode;
3435 else
3436 ggtt->base.pte_encode = snb_pte_encode;
7faf1ab2 3437
34c998b4 3438 return ggtt_probe_common(ggtt, size);
e76e9aeb
BW
3439}
3440
34c998b4 3441static void i915_gmch_remove(struct i915_address_space *vm)
e76e9aeb 3442{
34c998b4 3443 intel_gmch_remove();
644ec02b 3444}
baa09f5f 3445
d507d735 3446static int i915_gmch_probe(struct i915_ggtt *ggtt)
baa09f5f 3447{
49d73912 3448 struct drm_i915_private *dev_priv = ggtt->base.i915;
baa09f5f
BW
3449 int ret;
3450
91c8a326 3451 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
baa09f5f
BW
3452 if (!ret) {
3453 DRM_ERROR("failed to set up gmch\n");
3454 return -EIO;
3455 }
3456
edd1f2fe
CW
3457 intel_gtt_get(&ggtt->base.total,
3458 &ggtt->stolen_size,
3459 &ggtt->mappable_base,
3460 &ggtt->mappable_end);
baa09f5f 3461
97d6d7ab 3462 ggtt->do_idle_maps = needs_idle_maps(dev_priv);
d6473f56 3463 ggtt->base.insert_page = i915_ggtt_insert_page;
d507d735
JL
3464 ggtt->base.insert_entries = i915_ggtt_insert_entries;
3465 ggtt->base.clear_range = i915_ggtt_clear_range;
3466 ggtt->base.bind_vma = ggtt_bind_vma;
3467 ggtt->base.unbind_vma = ggtt_unbind_vma;
fa3f46af
MA
3468 ggtt->base.set_pages = ggtt_set_pages;
3469 ggtt->base.clear_pages = clear_pages;
34c998b4 3470 ggtt->base.cleanup = i915_gmch_remove;
baa09f5f 3471
7c3f86b6
CW
3472 ggtt->invalidate = gmch_ggtt_invalidate;
3473
d507d735 3474 if (unlikely(ggtt->do_idle_maps))
c0a7f818
CW
3475 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3476
baa09f5f
BW
3477 return 0;
3478}
3479
d85489d3 3480/**
0088e522 3481 * i915_ggtt_probe_hw - Probe GGTT hardware location
97d6d7ab 3482 * @dev_priv: i915 device
d85489d3 3483 */
97d6d7ab 3484int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
baa09f5f 3485{
62106b4f 3486 struct i915_ggtt *ggtt = &dev_priv->ggtt;
baa09f5f
BW
3487 int ret;
3488
49d73912 3489 ggtt->base.i915 = dev_priv;
8448661d 3490 ggtt->base.dma = &dev_priv->drm.pdev->dev;
c114f76a 3491
34c998b4
CW
3492 if (INTEL_GEN(dev_priv) <= 5)
3493 ret = i915_gmch_probe(ggtt);
3494 else if (INTEL_GEN(dev_priv) < 8)
3495 ret = gen6_gmch_probe(ggtt);
3496 else
3497 ret = gen8_gmch_probe(ggtt);
a54c0c27 3498 if (ret)
baa09f5f 3499 return ret;
baa09f5f 3500
db9309a5
CW
3501 /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3502 * This is easier than doing range restriction on the fly, as we
3503 * currently don't have any bits spare to pass in this upper
3504 * restriction!
3505 */
4f044a88 3506 if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) {
db9309a5
CW
3507 ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP);
3508 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3509 }
3510
c890e2d5
CW
3511 if ((ggtt->base.total - 1) >> 32) {
3512 DRM_ERROR("We never expected a Global GTT with more than 32bits"
f6b9d5ca 3513 " of address space! Found %lldM!\n",
c890e2d5
CW
3514 ggtt->base.total >> 20);
3515 ggtt->base.total = 1ULL << 32;
3516 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3517 }
3518
f6b9d5ca
CW
3519 if (ggtt->mappable_end > ggtt->base.total) {
3520 DRM_ERROR("mappable aperture extends past end of GGTT,"
3521 " aperture=%llx, total=%llx\n",
3522 ggtt->mappable_end, ggtt->base.total);
3523 ggtt->mappable_end = ggtt->base.total;
3524 }
3525
baa09f5f 3526 /* GMADR is the PCI mmio aperture into the global GTT. */
c44ef60e 3527 DRM_INFO("Memory usable by graphics device = %lluM\n",
62106b4f
JL
3528 ggtt->base.total >> 20);
3529 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
edd1f2fe 3530 DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
80debff8 3531 if (intel_vtd_active())
5db6c735 3532 DRM_INFO("VT-d active for gfx access\n");
baa09f5f
BW
3533
3534 return 0;
0088e522
CW
3535}
3536
3537/**
3538 * i915_ggtt_init_hw - Initialize GGTT hardware
97d6d7ab 3539 * @dev_priv: i915 device
0088e522 3540 */
97d6d7ab 3541int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
0088e522 3542{
0088e522
CW
3543 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3544 int ret;
3545
f6b9d5ca
CW
3546 INIT_LIST_HEAD(&dev_priv->vm_list);
3547
a6508ded
CW
3548 /* Note that we use page colouring to enforce a guard page at the
3549 * end of the address space. This is required as the CS may prefetch
3550 * beyond the end of the batch buffer, across the page boundary,
3551 * and beyond the end of the GTT if we do not provide a guard.
f6b9d5ca 3552 */
80b204bc 3553 mutex_lock(&dev_priv->drm.struct_mutex);
80b204bc 3554 i915_address_space_init(&ggtt->base, dev_priv, "[global]");
a6508ded 3555 if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
f6b9d5ca 3556 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
80b204bc 3557 mutex_unlock(&dev_priv->drm.struct_mutex);
f6b9d5ca 3558
f7bbe788
CW
3559 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
3560 dev_priv->ggtt.mappable_base,
3561 dev_priv->ggtt.mappable_end)) {
f6b9d5ca
CW
3562 ret = -EIO;
3563 goto out_gtt_cleanup;
3564 }
3565
3566 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
3567
0088e522
CW
3568 /*
3569 * Initialise stolen early so that we may reserve preallocated
3570 * objects for the BIOS to KMS transition.
3571 */
7ace3d30 3572 ret = i915_gem_init_stolen(dev_priv);
0088e522
CW
3573 if (ret)
3574 goto out_gtt_cleanup;
3575
3576 return 0;
a4eba47b
ID
3577
3578out_gtt_cleanup:
72e96d64 3579 ggtt->base.cleanup(&ggtt->base);
a4eba47b 3580 return ret;
baa09f5f 3581}
6f65e29a 3582
97d6d7ab 3583int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
ac840ae5 3584{
97d6d7ab 3585 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
ac840ae5
VS
3586 return -EIO;
3587
3588 return 0;
3589}
3590
7c3f86b6
CW
3591void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3592{
04f7b24e
CW
3593 GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3594
7c3f86b6
CW
3595 i915->ggtt.invalidate = guc_ggtt_invalidate;
3596}
3597
3598void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3599{
04f7b24e
CW
3600 /* We should only be called after i915_ggtt_enable_guc() */
3601 GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3602
3603 i915->ggtt.invalidate = gen6_ggtt_invalidate;
7c3f86b6
CW
3604}
3605
275a991c 3606void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
fa42331b 3607{
72e96d64 3608 struct i915_ggtt *ggtt = &dev_priv->ggtt;
fbb30a5c 3609 struct drm_i915_gem_object *obj, *on;
fa42331b 3610
dc97997a 3611 i915_check_and_clear_faults(dev_priv);
fa42331b
DV
3612
3613 /* First fill our portion of the GTT with scratch pages */
381b943b 3614 ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
fa42331b 3615
fbb30a5c
CW
3616 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
3617
3618 /* clflush objects bound into the GGTT and rebind them. */
f2123818 3619 list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) {
fbb30a5c
CW
3620 bool ggtt_bound = false;
3621 struct i915_vma *vma;
3622
1c7f4bca 3623 list_for_each_entry(vma, &obj->vma_list, obj_link) {
72e96d64 3624 if (vma->vm != &ggtt->base)
2c3d9984 3625 continue;
fa42331b 3626
fbb30a5c
CW
3627 if (!i915_vma_unbind(vma))
3628 continue;
3629
2c3d9984
TU
3630 WARN_ON(i915_vma_bind(vma, obj->cache_level,
3631 PIN_UPDATE));
fbb30a5c 3632 ggtt_bound = true;
2c3d9984
TU
3633 }
3634
fbb30a5c 3635 if (ggtt_bound)
975f7ff4 3636 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
2c3d9984 3637 }
fa42331b 3638
fbb30a5c
CW
3639 ggtt->base.closed = false;
3640
275a991c 3641 if (INTEL_GEN(dev_priv) >= 8) {
4395890a 3642 struct intel_ppat *ppat = &dev_priv->ppat;
fa42331b 3643
4395890a
ZW
3644 bitmap_set(ppat->dirty, 0, ppat->max_entries);
3645 dev_priv->ppat.update_hw(dev_priv);
fa42331b
DV
3646 return;
3647 }
3648
275a991c 3649 if (USES_PPGTT(dev_priv)) {
72e96d64
JL
3650 struct i915_address_space *vm;
3651
fa42331b 3652 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
e5716f55 3653 struct i915_hw_ppgtt *ppgtt;
fa42331b 3654
2bfa996e 3655 if (i915_is_ggtt(vm))
fa42331b 3656 ppgtt = dev_priv->mm.aliasing_ppgtt;
e5716f55
JL
3657 else
3658 ppgtt = i915_vm_to_ppgtt(vm);
fa42331b 3659
16a011c8 3660 gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
fa42331b
DV
3661 }
3662 }
3663
7c3f86b6 3664 i915_ggtt_invalidate(dev_priv);
fa42331b
DV
3665}
3666
804beb4b 3667static struct scatterlist *
2d7f3bdb 3668rotate_pages(const dma_addr_t *in, unsigned int offset,
804beb4b 3669 unsigned int width, unsigned int height,
87130255 3670 unsigned int stride,
804beb4b 3671 struct sg_table *st, struct scatterlist *sg)
50470bb0
TU
3672{
3673 unsigned int column, row;
3674 unsigned int src_idx;
50470bb0 3675
50470bb0 3676 for (column = 0; column < width; column++) {
87130255 3677 src_idx = stride * (height - 1) + column;
50470bb0
TU
3678 for (row = 0; row < height; row++) {
3679 st->nents++;
3680 /* We don't need the pages, but need to initialize
3681 * the entries so the sg list can be happily traversed.
3682 * The only thing we need are DMA addresses.
3683 */
3684 sg_set_page(sg, NULL, PAGE_SIZE, 0);
804beb4b 3685 sg_dma_address(sg) = in[offset + src_idx];
50470bb0
TU
3686 sg_dma_len(sg) = PAGE_SIZE;
3687 sg = sg_next(sg);
87130255 3688 src_idx -= stride;
50470bb0
TU
3689 }
3690 }
804beb4b
TU
3691
3692 return sg;
50470bb0
TU
3693}
3694
ba7a5741
CW
3695static noinline struct sg_table *
3696intel_rotate_pages(struct intel_rotation_info *rot_info,
3697 struct drm_i915_gem_object *obj)
50470bb0 3698{
75c7b0b8 3699 const unsigned long n_pages = obj->base.size / PAGE_SIZE;
6687c906 3700 unsigned int size = intel_rotation_info_size(rot_info);
85d1225e
DG
3701 struct sgt_iter sgt_iter;
3702 dma_addr_t dma_addr;
50470bb0
TU
3703 unsigned long i;
3704 dma_addr_t *page_addr_list;
3705 struct sg_table *st;
89e3e142 3706 struct scatterlist *sg;
1d00dad5 3707 int ret = -ENOMEM;
50470bb0 3708
50470bb0 3709 /* Allocate a temporary list of source pages for random access. */
2098105e 3710 page_addr_list = kvmalloc_array(n_pages,
f2a85e19 3711 sizeof(dma_addr_t),
0ee931c4 3712 GFP_KERNEL);
50470bb0
TU
3713 if (!page_addr_list)
3714 return ERR_PTR(ret);
3715
3716 /* Allocate target SG list. */
3717 st = kmalloc(sizeof(*st), GFP_KERNEL);
3718 if (!st)
3719 goto err_st_alloc;
3720
6687c906 3721 ret = sg_alloc_table(st, size, GFP_KERNEL);
50470bb0
TU
3722 if (ret)
3723 goto err_sg_alloc;
3724
3725 /* Populate source page list from the object. */
3726 i = 0;
a4f5ea64 3727 for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
85d1225e 3728 page_addr_list[i++] = dma_addr;
50470bb0 3729
85d1225e 3730 GEM_BUG_ON(i != n_pages);
11f20322
VS
3731 st->nents = 0;
3732 sg = st->sgl;
3733
6687c906
VS
3734 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3735 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3736 rot_info->plane[i].width, rot_info->plane[i].height,
3737 rot_info->plane[i].stride, st, sg);
89e3e142
TU
3738 }
3739
6687c906
VS
3740 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
3741 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
50470bb0 3742
2098105e 3743 kvfree(page_addr_list);
50470bb0
TU
3744
3745 return st;
3746
3747err_sg_alloc:
3748 kfree(st);
3749err_st_alloc:
2098105e 3750 kvfree(page_addr_list);
50470bb0 3751
6687c906
VS
3752 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3753 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3754
50470bb0
TU
3755 return ERR_PTR(ret);
3756}
ec7adb6e 3757
ba7a5741 3758static noinline struct sg_table *
8bd7ef16
JL
3759intel_partial_pages(const struct i915_ggtt_view *view,
3760 struct drm_i915_gem_object *obj)
3761{
3762 struct sg_table *st;
d2a84a76 3763 struct scatterlist *sg, *iter;
8bab1193 3764 unsigned int count = view->partial.size;
d2a84a76 3765 unsigned int offset;
8bd7ef16
JL
3766 int ret = -ENOMEM;
3767
3768 st = kmalloc(sizeof(*st), GFP_KERNEL);
3769 if (!st)
3770 goto err_st_alloc;
3771
d2a84a76 3772 ret = sg_alloc_table(st, count, GFP_KERNEL);
8bd7ef16
JL
3773 if (ret)
3774 goto err_sg_alloc;
3775
8bab1193 3776 iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
d2a84a76
CW
3777 GEM_BUG_ON(!iter);
3778
8bd7ef16
JL
3779 sg = st->sgl;
3780 st->nents = 0;
d2a84a76
CW
3781 do {
3782 unsigned int len;
8bd7ef16 3783
d2a84a76
CW
3784 len = min(iter->length - (offset << PAGE_SHIFT),
3785 count << PAGE_SHIFT);
3786 sg_set_page(sg, NULL, len, 0);
3787 sg_dma_address(sg) =
3788 sg_dma_address(iter) + (offset << PAGE_SHIFT);
3789 sg_dma_len(sg) = len;
8bd7ef16 3790
8bd7ef16 3791 st->nents++;
d2a84a76
CW
3792 count -= len >> PAGE_SHIFT;
3793 if (count == 0) {
3794 sg_mark_end(sg);
3795 return st;
3796 }
8bd7ef16 3797
d2a84a76
CW
3798 sg = __sg_next(sg);
3799 iter = __sg_next(iter);
3800 offset = 0;
3801 } while (1);
8bd7ef16
JL
3802
3803err_sg_alloc:
3804 kfree(st);
3805err_st_alloc:
3806 return ERR_PTR(ret);
3807}
3808
70b9f6f8 3809static int
50470bb0 3810i915_get_ggtt_vma_pages(struct i915_vma *vma)
fe14d5f4 3811{
ba7a5741 3812 int ret;
50470bb0 3813
2c3a3f44
CW
3814 /* The vma->pages are only valid within the lifespan of the borrowed
3815 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3816 * must be the vma->pages. A simple rule is that vma->pages must only
3817 * be accessed when the obj->mm.pages are pinned.
3818 */
3819 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3820
ba7a5741
CW
3821 switch (vma->ggtt_view.type) {
3822 case I915_GGTT_VIEW_NORMAL:
3823 vma->pages = vma->obj->mm.pages;
fe14d5f4
TU
3824 return 0;
3825
ba7a5741 3826 case I915_GGTT_VIEW_ROTATED:
247177dd 3827 vma->pages =
ba7a5741
CW
3828 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3829 break;
3830
3831 case I915_GGTT_VIEW_PARTIAL:
247177dd 3832 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
ba7a5741
CW
3833 break;
3834
3835 default:
fe14d5f4
TU
3836 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3837 vma->ggtt_view.type);
ba7a5741
CW
3838 return -EINVAL;
3839 }
fe14d5f4 3840
ba7a5741
CW
3841 ret = 0;
3842 if (unlikely(IS_ERR(vma->pages))) {
247177dd
CW
3843 ret = PTR_ERR(vma->pages);
3844 vma->pages = NULL;
50470bb0
TU
3845 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3846 vma->ggtt_view.type, ret);
fe14d5f4 3847 }
50470bb0 3848 return ret;
fe14d5f4
TU
3849}
3850
625d988a
CW
3851/**
3852 * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
a4dbf7cf
CW
3853 * @vm: the &struct i915_address_space
3854 * @node: the &struct drm_mm_node (typically i915_vma.mode)
3855 * @size: how much space to allocate inside the GTT,
3856 * must be #I915_GTT_PAGE_SIZE aligned
3857 * @offset: where to insert inside the GTT,
3858 * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3859 * (@offset + @size) must fit within the address space
3860 * @color: color to apply to node, if this node is not from a VMA,
3861 * color must be #I915_COLOR_UNEVICTABLE
3862 * @flags: control search and eviction behaviour
625d988a
CW
3863 *
3864 * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3865 * the address space (using @size and @color). If the @node does not fit, it
3866 * tries to evict any overlapping nodes from the GTT, including any
3867 * neighbouring nodes if the colors do not match (to ensure guard pages between
3868 * differing domains). See i915_gem_evict_for_node() for the gory details
3869 * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3870 * evicting active overlapping objects, and any overlapping node that is pinned
3871 * or marked as unevictable will also result in failure.
3872 *
3873 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3874 * asked to wait for eviction and interrupted.
3875 */
3876int i915_gem_gtt_reserve(struct i915_address_space *vm,
3877 struct drm_mm_node *node,
3878 u64 size, u64 offset, unsigned long color,
3879 unsigned int flags)
3880{
3881 int err;
3882
3883 GEM_BUG_ON(!size);
3884 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3885 GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3886 GEM_BUG_ON(range_overflows(offset, size, vm->total));
3fec7ec4 3887 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
9734ad13 3888 GEM_BUG_ON(drm_mm_node_allocated(node));
625d988a
CW
3889
3890 node->size = size;
3891 node->start = offset;
3892 node->color = color;
3893
3894 err = drm_mm_reserve_node(&vm->mm, node);
3895 if (err != -ENOSPC)
3896 return err;
3897
616d9cee
CW
3898 if (flags & PIN_NOEVICT)
3899 return -ENOSPC;
3900
625d988a
CW
3901 err = i915_gem_evict_for_node(vm, node, flags);
3902 if (err == 0)
3903 err = drm_mm_reserve_node(&vm->mm, node);
3904
3905 return err;
3906}
3907
606fec95
CW
3908static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3909{
3910 u64 range, addr;
3911
3912 GEM_BUG_ON(range_overflows(start, len, end));
3913 GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3914
3915 range = round_down(end - len, align) - round_up(start, align);
3916 if (range) {
3917 if (sizeof(unsigned long) == sizeof(u64)) {
3918 addr = get_random_long();
3919 } else {
3920 addr = get_random_int();
3921 if (range > U32_MAX) {
3922 addr <<= 32;
3923 addr |= get_random_int();
3924 }
3925 }
3926 div64_u64_rem(addr, range, &addr);
3927 start += addr;
3928 }
3929
3930 return round_up(start, align);
3931}
3932
e007b19d
CW
3933/**
3934 * i915_gem_gtt_insert - insert a node into an address_space (GTT)
a4dbf7cf
CW
3935 * @vm: the &struct i915_address_space
3936 * @node: the &struct drm_mm_node (typically i915_vma.node)
3937 * @size: how much space to allocate inside the GTT,
3938 * must be #I915_GTT_PAGE_SIZE aligned
3939 * @alignment: required alignment of starting offset, may be 0 but
3940 * if specified, this must be a power-of-two and at least
3941 * #I915_GTT_MIN_ALIGNMENT
3942 * @color: color to apply to node
3943 * @start: start of any range restriction inside GTT (0 for all),
e007b19d 3944 * must be #I915_GTT_PAGE_SIZE aligned
a4dbf7cf
CW
3945 * @end: end of any range restriction inside GTT (U64_MAX for all),
3946 * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
3947 * @flags: control search and eviction behaviour
e007b19d
CW
3948 *
3949 * i915_gem_gtt_insert() first searches for an available hole into which
3950 * is can insert the node. The hole address is aligned to @alignment and
3951 * its @size must then fit entirely within the [@start, @end] bounds. The
3952 * nodes on either side of the hole must match @color, or else a guard page
3953 * will be inserted between the two nodes (or the node evicted). If no
606fec95
CW
3954 * suitable hole is found, first a victim is randomly selected and tested
3955 * for eviction, otherwise then the LRU list of objects within the GTT
e007b19d
CW
3956 * is scanned to find the first set of replacement nodes to create the hole.
3957 * Those old overlapping nodes are evicted from the GTT (and so must be
3958 * rebound before any future use). Any node that is currently pinned cannot
3959 * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
3960 * active and #PIN_NONBLOCK is specified, that node is also skipped when
3961 * searching for an eviction candidate. See i915_gem_evict_something() for
3962 * the gory details on the eviction algorithm.
3963 *
3964 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3965 * asked to wait for eviction and interrupted.
3966 */
3967int i915_gem_gtt_insert(struct i915_address_space *vm,
3968 struct drm_mm_node *node,
3969 u64 size, u64 alignment, unsigned long color,
3970 u64 start, u64 end, unsigned int flags)
3971{
4e64e553 3972 enum drm_mm_insert_mode mode;
606fec95 3973 u64 offset;
e007b19d
CW
3974 int err;
3975
3976 lockdep_assert_held(&vm->i915->drm.struct_mutex);
3977 GEM_BUG_ON(!size);
3978 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3979 GEM_BUG_ON(alignment && !is_power_of_2(alignment));
3980 GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
3981 GEM_BUG_ON(start >= end);
3982 GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
3983 GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
3fec7ec4 3984 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
9734ad13 3985 GEM_BUG_ON(drm_mm_node_allocated(node));
e007b19d
CW
3986
3987 if (unlikely(range_overflows(start, size, end)))
3988 return -ENOSPC;
3989
3990 if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
3991 return -ENOSPC;
3992
4e64e553
CW
3993 mode = DRM_MM_INSERT_BEST;
3994 if (flags & PIN_HIGH)
3995 mode = DRM_MM_INSERT_HIGH;
3996 if (flags & PIN_MAPPABLE)
3997 mode = DRM_MM_INSERT_LOW;
e007b19d
CW
3998
3999 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
4000 * so we know that we always have a minimum alignment of 4096.
4001 * The drm_mm range manager is optimised to return results
4002 * with zero alignment, so where possible use the optimal
4003 * path.
4004 */
4005 BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
4006 if (alignment <= I915_GTT_MIN_ALIGNMENT)
4007 alignment = 0;
4008
4e64e553
CW
4009 err = drm_mm_insert_node_in_range(&vm->mm, node,
4010 size, alignment, color,
4011 start, end, mode);
e007b19d
CW
4012 if (err != -ENOSPC)
4013 return err;
4014
616d9cee
CW
4015 if (flags & PIN_NOEVICT)
4016 return -ENOSPC;
4017
606fec95
CW
4018 /* No free space, pick a slot at random.
4019 *
4020 * There is a pathological case here using a GTT shared between
4021 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
4022 *
4023 * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
4024 * (64k objects) (448k objects)
4025 *
4026 * Now imagine that the eviction LRU is ordered top-down (just because
4027 * pathology meets real life), and that we need to evict an object to
4028 * make room inside the aperture. The eviction scan then has to walk
4029 * the 448k list before it finds one within range. And now imagine that
4030 * it has to search for a new hole between every byte inside the memcpy,
4031 * for several simultaneous clients.
4032 *
4033 * On a full-ppgtt system, if we have run out of available space, there
4034 * will be lots and lots of objects in the eviction list! Again,
4035 * searching that LRU list may be slow if we are also applying any
4036 * range restrictions (e.g. restriction to low 4GiB) and so, for
4037 * simplicity and similarilty between different GTT, try the single
4038 * random replacement first.
4039 */
4040 offset = random_offset(start, end,
4041 size, alignment ?: I915_GTT_MIN_ALIGNMENT);
4042 err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
4043 if (err != -ENOSPC)
4044 return err;
4045
4046 /* Randomly selected placement is pinned, do a search */
e007b19d
CW
4047 err = i915_gem_evict_something(vm, size, alignment, color,
4048 start, end, flags);
4049 if (err)
4050 return err;
4051
4e64e553
CW
4052 return drm_mm_insert_node_in_range(&vm->mm, node,
4053 size, alignment, color,
4054 start, end, DRM_MM_INSERT_EVICT);
e007b19d 4055}
3b5bb0a3
CW
4056
4057#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4058#include "selftests/mock_gtt.c"
1c42819a 4059#include "selftests/i915_gem_gtt.c"
3b5bb0a3 4060#endif