drivers/gpu/drm/i915/gt/intel_gtt.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2020 Intel Corporation
   4  */
   5
   6 #include <linux/slab.h> /* fault-inject.h is not standalone! */
   7
   8 #include <linux/fault-inject.h>
   9 #include <linux/sched/mm.h>
  10
  11 #include <drm/drm_cache.h>
  12
  13 #include "gem/i915_gem_internal.h"
  14 #include "gem/i915_gem_lmem.h"
  15 #include "i915_reg.h"
  16 #include "i915_trace.h"
  17 #include "i915_utils.h"
  18 #include "intel_gt.h"
  19 #include "intel_gt_mcr.h"
  20 #include "intel_gt_regs.h"
  21 #include "intel_gtt.h"
  22
  23
  24 static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
  25 {
  26         return IS_BROXTON(i915) && i915_vtd_active(i915);
  27 }
  28
  29 bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915)
  30 {
  31         return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915);
  32 }
  33
  34 struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
  35 {
  36         struct drm_i915_gem_object *obj;
  37
  38         /*
  39          * To avoid severe over-allocation when dealing with min_page_size
  40          * restrictions, we override that behaviour here by allowing an object
  41          * size and page layout which can be smaller. In practice this should be
  42          * totally fine, since GTT paging structures are not typically inserted
  43          * into the GTT.
  44          *
  45          * Note that we also hit this path for the scratch page, and for this
  46          * case it might need to be 64K, but that should work fine here since we
  47          * used the passed in size for the page size, which should ensure it
  48          * also has the same alignment.
  49          */
  50         obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz,
  51                                                     vm->lmem_pt_obj_flags);
  52         /*
  53          * Ensure all paging structures for this vm share the same dma-resv
  54          * object underneath, with the idea that one object_lock() will lock
  55          * them all at once.
  56          */
  57         if (!IS_ERR(obj)) {
  58                 obj->base.resv = i915_vm_resv_get(vm);
  59                 obj->shares_resv_from = vm;
  60         }
  61
  62         return obj;
  63 }
  64
  65 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
  66 {
  67         struct drm_i915_gem_object *obj;
  68
  69         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
  70                 i915_gem_shrink_all(vm->i915);
  71
  72         obj = i915_gem_object_create_internal(vm->i915, sz);
  73         /*
  74          * Ensure all paging structures for this vm share the same dma-resv
  75          * object underneath, with the idea that one object_lock() will lock
  76          * them all at once.
  77          */
  78         if (!IS_ERR(obj)) {
  79                 obj->base.resv = i915_vm_resv_get(vm);
  80                 obj->shares_resv_from = vm;
  81         }
  82
  83         return obj;
  84 }
  85
  86 int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
  87 {
  88         enum i915_map_type type;
  89         void *vaddr;
  90
  91         type = i915_coherent_map_type(vm->i915, obj, true);
  92         vaddr = i915_gem_object_pin_map_unlocked(obj, type);
  93         if (IS_ERR(vaddr))
  94                 return PTR_ERR(vaddr);
  95
  96         i915_gem_object_make_unshrinkable(obj);
  97         return 0;
  98 }
  99
 100 int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
 101 {
 102         enum i915_map_type type;
 103         void *vaddr;
 104
 105         type = i915_coherent_map_type(vm->i915, obj, true);
 106         vaddr = i915_gem_object_pin_map(obj, type);
 107         if (IS_ERR(vaddr))
 108                 return PTR_ERR(vaddr);
 109
 110         i915_gem_object_make_unshrinkable(obj);
 111         return 0;
 112 }
 113
 114 static void clear_vm_list(struct list_head *list)
 115 {
 116         struct i915_vma *vma, *vn;
 117
 118         list_for_each_entry_safe(vma, vn, list, vm_link) {
 119                 struct drm_i915_gem_object *obj = vma->obj;
 120
 121                 if (!i915_gem_object_get_rcu(obj)) {
 122                         /*
 123                          * Object is dying, but has not yet cleared its
 124                          * vma list.
 125                          * Unbind the dying vma to ensure our list
 126                          * is completely drained. We leave the destruction to
 127                          * the object destructor to avoid the vma
 128                          * disappearing under it.
 129                          */
 130                         atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
 131                         WARN_ON(__i915_vma_unbind(vma));
 132
 133                         /* Remove from the unbound list */
 134                         list_del_init(&vma->vm_link);
 135
 136                         /*
 137                          * Delay the vm and vm mutex freeing until the
 138                          * object is done with destruction.
 139                          */
 140                         i915_vm_resv_get(vma->vm);
 141                         vma->vm_ddestroy = true;
 142                 } else {
 143                         i915_vma_destroy_locked(vma);
 144                         i915_gem_object_put(obj);
 145                 }
 146
 147         }
 148 }
 149
 150 static void __i915_vm_close(struct i915_address_space *vm)
 151 {
 152         mutex_lock(&vm->mutex);
 153
 154         clear_vm_list(&vm->bound_list);
 155         clear_vm_list(&vm->unbound_list);
 156
 157         /* Check for must-fix unanticipated side-effects */
 158         GEM_BUG_ON(!list_empty(&vm->bound_list));
 159         GEM_BUG_ON(!list_empty(&vm->unbound_list));
 160
 161         mutex_unlock(&vm->mutex);
 162 }
 163
 164 /* lock the vm into the current ww, if we lock one, we lock all */
 165 int i915_vm_lock_objects(struct i915_address_space *vm,
 166                          struct i915_gem_ww_ctx *ww)
 167 {
 168         if (vm->scratch[0]->base.resv == &vm->_resv) {
 169                 return i915_gem_object_lock(vm->scratch[0], ww);
 170         } else {
 171                 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 172
 173                 /* We borrowed the scratch page from ggtt, take the top level object */
 174                 return i915_gem_object_lock(ppgtt->pd->pt.base, ww);
 175         }
 176 }
 177
 178 void i915_address_space_fini(struct i915_address_space *vm)
 179 {
 180         drm_mm_takedown(&vm->mm);
 181 }
 182
 183 /**
 184  * i915_vm_resv_release - Final struct i915_address_space destructor
 185  * @kref: Pointer to the &i915_address_space.resv_ref member.
 186  *
 187  * This function is called when the last lock sharer no longer shares the
 188  * &i915_address_space._resv lock, and also if we raced when
 189  * destroying a vma by the vma destruction
 190  */
 191 void i915_vm_resv_release(struct kref *kref)
 192 {
 193         struct i915_address_space *vm =
 194                 container_of(kref, typeof(*vm), resv_ref);
 195
 196         dma_resv_fini(&vm->_resv);
 197         mutex_destroy(&vm->mutex);
 198
 199         kfree(vm);
 200 }
 201
 202 static void __i915_vm_release(struct work_struct *work)
 203 {
 204         struct i915_address_space *vm =
 205                 container_of(work, struct i915_address_space, release_work);
 206
 207         __i915_vm_close(vm);
 208
 209         /* Synchronize async unbinds. */
 210         i915_vma_resource_bind_dep_sync_all(vm);
 211
 212         vm->cleanup(vm);
 213         i915_address_space_fini(vm);
 214
 215         i915_vm_resv_put(vm);
 216 }
 217
 218 void i915_vm_release(struct kref *kref)
 219 {
 220         struct i915_address_space *vm =
 221                 container_of(kref, struct i915_address_space, ref);
 222
 223         GEM_BUG_ON(i915_is_ggtt(vm));
 224         trace_i915_ppgtt_release(vm);
 225
 226         queue_work(vm->i915->wq, &vm->release_work);
 227 }
 228
 229 void i915_address_space_init(struct i915_address_space *vm, int subclass)
 230 {
 231         kref_init(&vm->ref);
 232
 233         /*
 234          * Special case for GGTT that has already done an early
 235          * kref_init here.
 236          */
 237         if (!kref_read(&vm->resv_ref))
 238                 kref_init(&vm->resv_ref);
 239
 240         vm->pending_unbind = RB_ROOT_CACHED;
 241         INIT_WORK(&vm->release_work, __i915_vm_release);
 242
 243         /*
 244          * The vm->mutex must be reclaim safe (for use in the shrinker).
 245          * Do a dummy acquire now under fs_reclaim so that any allocation
 246          * attempt holding the lock is immediately reported by lockdep.
 247          */
 248         mutex_init(&vm->mutex);
 249         lockdep_set_subclass(&vm->mutex, subclass);
 250
 251         if (!intel_vm_no_concurrent_access_wa(vm->i915)) {
 252                 i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
 253         } else {
 254                 /*
 255                  * CHV + BXT VTD workaround use stop_machine(),
 256                  * which is allowed to allocate memory. This means &vm->mutex
 257                  * is the outer lock, and in theory we can allocate memory inside
 258                  * it through stop_machine().
 259                  *
 260                  * Add the annotation for this, we use trylock in shrinker.
 261                  */
 262                 mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_);
 263                 might_alloc(GFP_KERNEL);
 264                 mutex_release(&vm->mutex.dep_map, _THIS_IP_);
 265         }
 266         dma_resv_init(&vm->_resv);
 267
 268         GEM_BUG_ON(!vm->total);
 269         drm_mm_init(&vm->mm, 0, vm->total);
 270
 271         memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
 272                  ARRAY_SIZE(vm->min_alignment));
 273
 274         if (HAS_64K_PAGES(vm->i915)) {
 275                 vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
 276                 vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
 277         }
 278
 279         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 280
 281         INIT_LIST_HEAD(&vm->bound_list);
 282         INIT_LIST_HEAD(&vm->unbound_list);
 283 }
 284
 285 void *__px_vaddr(struct drm_i915_gem_object *p)
 286 {
 287         enum i915_map_type type;
 288
 289         GEM_BUG_ON(!i915_gem_object_has_pages(p));
 290         return page_unpack_bits(p->mm.mapping, &type);
 291 }
 292
 293 dma_addr_t __px_dma(struct drm_i915_gem_object *p)
 294 {
 295         GEM_BUG_ON(!i915_gem_object_has_pages(p));
 296         return sg_dma_address(p->mm.pages->sgl);
 297 }
 298
 299 struct page *__px_page(struct drm_i915_gem_object *p)
 300 {
 301         GEM_BUG_ON(!i915_gem_object_has_pages(p));
 302         return sg_page(p->mm.pages->sgl);
 303 }
 304
 305 void
 306 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
 307 {
 308         void *vaddr = __px_vaddr(p);
 309
 310         memset64(vaddr, val, count);
 311         drm_clflush_virt_range(vaddr, PAGE_SIZE);
 312 }
 313
 314 static void poison_scratch_page(struct drm_i915_gem_object *scratch)
 315 {
 316         void *vaddr = __px_vaddr(scratch);
 317         u8 val;
 318
 319         val = 0;
 320         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 321                 val = POISON_FREE;
 322
 323         memset(vaddr, val, scratch->base.size);
 324         drm_clflush_virt_range(vaddr, scratch->base.size);
 325 }
 326
 327 int setup_scratch_page(struct i915_address_space *vm)
 328 {
 329         unsigned long size;
 330
 331         /*
 332          * In order to utilize 64K pages for an object with a size < 2M, we will
 333          * need to support a 64K scratch page, given that every 16th entry for a
 334          * page-table operating in 64K mode must point to a properly aligned 64K
 335          * region, including any PTEs which happen to point to scratch.
 336          *
 337          * This is only relevant for the 48b PPGTT where we support
 338          * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
 339          * scratch (read-only) between all vm, we create one 64k scratch page
 340          * for all.
 341          */
 342         size = I915_GTT_PAGE_SIZE_4K;
 343         if (i915_vm_is_4lvl(vm) &&
 344             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) &&
 345             !HAS_64K_PAGES(vm->i915))
 346                 size = I915_GTT_PAGE_SIZE_64K;
 347
 348         do {
 349                 struct drm_i915_gem_object *obj;
 350
 351                 obj = vm->alloc_scratch_dma(vm, size);
 352                 if (IS_ERR(obj))
 353                         goto skip;
 354
 355                 if (map_pt_dma(vm, obj))
 356                         goto skip_obj;
 357
 358                 /* We need a single contiguous page for our scratch */
 359                 if (obj->mm.page_sizes.sg < size)
 360                         goto skip_obj;
 361
 362                 /* And it needs to be correspondingly aligned */
 363                 if (__px_dma(obj) & (size - 1))
 364                         goto skip_obj;
 365
 366                 /*
 367                  * Use a non-zero scratch page for debugging.
 368                  *
 369                  * We want a value that should be reasonably obvious
 370                  * to spot in the error state, while also causing a GPU hang
 371                  * if executed. We prefer using a clear page in production, so
 372                  * should it ever be accidentally used, the effect should be
 373                  * fairly benign.
 374                  */
 375                 poison_scratch_page(obj);
 376
 377                 vm->scratch[0] = obj;
 378                 vm->scratch_order = get_order(size);
 379                 return 0;
 380
 381 skip_obj:
 382                 i915_gem_object_put(obj);
 383 skip:
 384                 if (size == I915_GTT_PAGE_SIZE_4K)
 385                         return -ENOMEM;
 386
 387                 size = I915_GTT_PAGE_SIZE_4K;
 388         } while (1);
 389 }
 390
 391 void free_scratch(struct i915_address_space *vm)
 392 {
 393         int i;
 394
 395         if (!vm->scratch[0])
 396                 return;
 397
 398         for (i = 0; i <= vm->top; i++)
 399                 i915_gem_object_put(vm->scratch[i]);
 400 }
 401
 402 void gtt_write_workarounds(struct intel_gt *gt)
 403 {
 404         struct drm_i915_private *i915 = gt->i915;
 405         struct intel_uncore *uncore = gt->uncore;
 406
 407         /*
 408          * This function is for gtt related workarounds. This function is
 409          * called on driver load and after a GPU reset, so you can place
 410          * workarounds here even if they get overwritten by GPU reset.
 411          */
 412         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
 413         if (IS_BROADWELL(i915))
 414                 intel_uncore_write(uncore,
 415                                    GEN8_L3_LRA_1_GPGPU,
 416                                    GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
 417         else if (IS_CHERRYVIEW(i915))
 418                 intel_uncore_write(uncore,
 419                                    GEN8_L3_LRA_1_GPGPU,
 420                                    GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
 421         else if (IS_GEN9_LP(i915))
 422                 intel_uncore_write(uncore,
 423                                    GEN8_L3_LRA_1_GPGPU,
 424                                    GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
 425         else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11)
 426                 intel_uncore_write(uncore,
 427                                    GEN8_L3_LRA_1_GPGPU,
 428                                    GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
 429
 430         /*
 431          * To support 64K PTEs we need to first enable the use of the
 432          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
 433          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
 434          * shouldn't be needed after GEN10.
 435          *
 436          * 64K pages were first introduced from BDW+, although technically they
 437          * only *work* from gen9+. For pre-BDW we instead have the option for
 438          * 32K pages, but we don't currently have any support for it in our
 439          * driver.
 440          */
 441         if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
 442             GRAPHICS_VER(i915) <= 10)
 443                 intel_uncore_rmw(uncore,
 444                                  GEN8_GAMW_ECO_DEV_RW_IA,
 445                                  0,
 446                                  GAMW_ECO_ENABLE_64K_IPS_FIELD);
 447
 448         if (IS_GRAPHICS_VER(i915, 8, 11)) {
 449                 bool can_use_gtt_cache = true;
 450
 451                 /*
 452                  * According to the BSpec if we use 2M/1G pages then we also
 453                  * need to disable the GTT cache. At least on BDW we can see
 454                  * visual corruption when using 2M pages, and not disabling the
 455                  * GTT cache.
 456                  */
 457                 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
 458                         can_use_gtt_cache = false;
 459
 460                 /* WaGttCachingOffByDefault */
 461                 intel_uncore_write(uncore,
 462                                    HSW_GTT_CACHE_EN,
 463                                    can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
 464                 drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
 465                                  intel_uncore_read(uncore,
 466                                                    HSW_GTT_CACHE_EN) == 0);
 467         }
 468 }
 469
 470 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 471 {
 472         /* TGL doesn't support LLC or AGE settings */
 473         intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
 474         intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
 475         intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
 476         intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
 477         intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
 478         intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
 479         intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
 480         intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
 481 }
 482
 483 static void xehp_setup_private_ppat(struct intel_gt *gt)
 484 {
 485         enum forcewake_domains fw;
 486         unsigned long flags;
 487
 488         fw = intel_uncore_forcewake_for_reg(gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg),
 489                                             FW_REG_WRITE);
 490         intel_uncore_forcewake_get(gt->uncore, fw);
 491
 492         intel_gt_mcr_lock(gt, &flags);
 493         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
 494         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
 495         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
 496         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
 497         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
 498         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
 499         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
 500         intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
 501         intel_gt_mcr_unlock(gt, flags);
 502
 503         intel_uncore_forcewake_put(gt->uncore, fw);
 504 }
 505
 506 static void icl_setup_private_ppat(struct intel_uncore *uncore)
 507 {
 508         intel_uncore_write(uncore,
 509                            GEN10_PAT_INDEX(0),
 510                            GEN8_PPAT_WB | GEN8_PPAT_LLC);
 511         intel_uncore_write(uncore,
 512                            GEN10_PAT_INDEX(1),
 513                            GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
 514         intel_uncore_write(uncore,
 515                            GEN10_PAT_INDEX(2),
 516                            GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
 517         intel_uncore_write(uncore,
 518                            GEN10_PAT_INDEX(3),
 519                            GEN8_PPAT_UC);
 520         intel_uncore_write(uncore,
 521                            GEN10_PAT_INDEX(4),
 522                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
 523         intel_uncore_write(uncore,
 524                            GEN10_PAT_INDEX(5),
 525                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
 526         intel_uncore_write(uncore,
 527                            GEN10_PAT_INDEX(6),
 528                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
 529         intel_uncore_write(uncore,
 530                            GEN10_PAT_INDEX(7),
 531                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
 532 }
 533
 534 /*
 535  * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
 536  * bits. When using advanced contexts each context stores its own PAT, but
 537  * writing this data shouldn't be harmful even in those cases.
 538  */
 539 static void bdw_setup_private_ppat(struct intel_uncore *uncore)
 540 {
 541         struct drm_i915_private *i915 = uncore->i915;
 542         u64 pat;
 543
 544         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |      /* for normal objects, no eLLC */
 545               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |  /* for something pointing to ptes? */
 546               GEN8_PPAT(3, GEN8_PPAT_UC) |                      /* Uncached objects, mostly for scanout */
 547               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
 548               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
 549               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
 550               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
 551
 552         /* for scanout with eLLC */
 553         if (GRAPHICS_VER(i915) >= 9)
 554                 pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
 555         else
 556                 pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
 557
 558         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
 559         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
 560 }
 561
 562 static void chv_setup_private_ppat(struct intel_uncore *uncore)
 563 {
 564         u64 pat;
 565
 566         /*
 567          * Map WB on BDW to snooped on CHV.
 568          *
 569          * Only the snoop bit has meaning for CHV, the rest is
 570          * ignored.
 571          *
 572          * The hardware will never snoop for certain types of accesses:
 573          * - CPU GTT (GMADR->GGTT->no snoop->memory)
 574          * - PPGTT page tables
 575          * - some other special cycles
 576          *
 577          * As with BDW, we also need to consider the following for GT accesses:
 578          * "For GGTT, there is NO pat_sel[2:0] from the entry,
 579          * so RTL will always use the value corresponding to
 580          * pat_sel = 000".
 581          * Which means we must set the snoop bit in PAT entry 0
 582          * in order to keep the global status page working.
 583          */
 584
 585         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
 586               GEN8_PPAT(1, 0) |
 587               GEN8_PPAT(2, 0) |
 588               GEN8_PPAT(3, 0) |
 589               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
 590               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
 591               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
 592               GEN8_PPAT(7, CHV_PPAT_SNOOP);
 593
 594         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
 595         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
 596 }
 597
 598 void setup_private_pat(struct intel_gt *gt)
 599 {
 600         struct intel_uncore *uncore = gt->uncore;
 601         struct drm_i915_private *i915 = gt->i915;
 602
 603         GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 604
 605         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 606                 xehp_setup_private_ppat(gt);
 607         else if (GRAPHICS_VER(i915) >= 12)
 608                 tgl_setup_private_ppat(uncore);
 609         else if (GRAPHICS_VER(i915) >= 11)
 610                 icl_setup_private_ppat(uncore);
 611         else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
 612                 chv_setup_private_ppat(uncore);
 613         else
 614                 bdw_setup_private_ppat(uncore);
 615 }
 616
 617 struct i915_vma *
 618 __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
 619 {
 620         struct drm_i915_gem_object *obj;
 621         struct i915_vma *vma;
 622
 623         obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size));
 624         if (IS_ERR(obj))
 625                 return ERR_CAST(obj);
 626
 627         i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
 628
 629         vma = i915_vma_instance(obj, vm, NULL);
 630         if (IS_ERR(vma)) {
 631                 i915_gem_object_put(obj);
 632                 return vma;
 633         }
 634
 635         return vma;
 636 }
 637
 638 struct i915_vma *
 639 __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size)
 640 {
 641         struct i915_vma *vma;
 642         int err;
 643
 644         vma = __vm_create_scratch_for_read(vm, size);
 645         if (IS_ERR(vma))
 646                 return vma;
 647
 648         err = i915_vma_pin(vma, 0, 0,
 649                            i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
 650         if (err) {
 651                 i915_vma_put(vma);
 652                 return ERR_PTR(err);
 653         }
 654
 655         return vma;
 656 }
 657
 658 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 659 #include "selftests/mock_gtt.c"
 660 #endif