Merge branch 'drm-next-4.18' of git://people.freedesktop.org/~agd5f/linux into drm...

author Dave Airlie <airlied@redhat.com>

Tue, 15 May 2018 22:21:51 +0000 (08:21 +1000)

committer Dave Airlie <airlied@redhat.com>

Tue, 15 May 2018 22:31:29 +0000 (08:31 +1000)
author Dave Airlie <airlied@redhat.com>
Tue, 15 May 2018 22:21:51 +0000 (08:21 +1000)
committer Dave Airlie <airlied@redhat.com>
Tue, 15 May 2018 22:31:29 +0000 (08:31 +1000)
diff --combined drivers/gpu/drm/amd/amdgpu/Makefile

index f3002020df6ccc380b548aa2d9a920cf2a9c710d,2fe4a0bf98c845e08e884f3d1c831fa3591f5f16..68e9f584c570df45eb7973bcd77f64b35b0336c6
--- 1/drivers/gpu/drm/amd/amdgpu/Makefile
--- 2/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@@ -64,6 -64,10 +64,10 @@@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o g
   amdgpu-y += \
         vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
   
+ # add DF block
+ amdgpu-y += \
+       df_v1_7.o
+ 
   # add GMC block
   amdgpu-y += \
         gmc_v7_0.o \
@@@ -130,8 -134,7 +134,8 @@@ amdgpu-y += 
          amdgpu_amdkfd.o \
          amdgpu_amdkfd_fence.o \
          amdgpu_amdkfd_gpuvm.o \
- -       amdgpu_amdkfd_gfx_v8.o
+ +       amdgpu_amdkfd_gfx_v8.o \
+ +       amdgpu_amdkfd_gfx_v9.o
   
   # add cgs
   amdgpu-y += amdgpu_cgs.o
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index cd0e8f192e6a52f20f8a11d7aea656c736ce8029,887702c59488434cab0e1e36b6393ba4dc146560..bd36ee9f7e6d7ed012dd63b6973fdbb112b70998
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@@ -92,10 -92,6 +92,10 @@@ void amdgpu_amdkfd_device_probe(struct 
         case CHIP_POLARIS11:
                 kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
                 break;
+ +      case CHIP_VEGA10:
+ +      case CHIP_RAVEN:
+ +              kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
+ +              break;
         default:
                 dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
                 return;
@@@ -179,28 -175,6 +179,28 @@@ void amdgpu_amdkfd_device_init(struct a
                                 &gpu_resources.doorbell_physical_address,
                                 &gpu_resources.doorbell_aperture_size,
                                 &gpu_resources.doorbell_start_offset);
+ +              if (adev->asic_type >= CHIP_VEGA10) {
+ +                      /* On SOC15 the BIF is involved in routing
+ +                       * doorbells using the low 12 bits of the
+ +                       * address. Communicate the assignments to
+ +                       * KFD. KFD uses two doorbell pages per
+ +                       * process in case of 64-bit doorbells so we
+ +                       * can use each doorbell assignment twice.
+ +                       */
+ +                      gpu_resources.sdma_doorbell[0][0] =
+ +                              AMDGPU_DOORBELL64_sDMA_ENGINE0;
+ +                      gpu_resources.sdma_doorbell[0][1] =
+ +                              AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
+ +                      gpu_resources.sdma_doorbell[1][0] =
+ +                              AMDGPU_DOORBELL64_sDMA_ENGINE1;
+ +                      gpu_resources.sdma_doorbell[1][1] =
+ +                              AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
+ +                      /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
+ +                       * SDMA, IH and VCN. So don't use them for the CP.
+ +                       */
+ +                      gpu_resources.reserved_doorbell_mask = 0x1f0;
+ +                      gpu_resources.reserved_doorbell_val  = 0x0f0;
+ +              }
   
                 kgd2kfd->device_init(adev->kfd, &gpu_resources);
         }
@@@ -243,13 -217,19 +243,19 @@@ int alloc_gtt_mem(struct kgd_dev *kgd, 
   {
         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
         struct amdgpu_bo *bo = NULL;
+       struct amdgpu_bo_param bp;
         int r;
         uint64_t gpu_addr_tmp = 0;
         void *cpu_ptr_tmp = NULL;
   
-       r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
-                            AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
-                            NULL, &bo);
+       memset(&bp, 0, sizeof(bp));
+       bp.size = size;
+       bp.byte_align = PAGE_SIZE;
+       bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+       bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+       bp.type = ttm_bo_type_kernel;
+       bp.resv = NULL;
+       r = amdgpu_bo_create(adev, &bp, &bo);
         if (r) {
                 dev_err(adev->dev,
                         "failed to allocate BO for amdkfd (%d)\n", r);
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 5296e24fd6620567d995a4b4d6d7651ce0e5907f,c1b0cdb401dcc8ed12b5d8af5512045dd37bcae8..72ab2b1ffe7510ef26b034a60addf51cd123f6fd
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@@ -23,7 -23,6 +23,7 @@@
   #define pr_fmt(fmt) "kfd2kgd: " fmt
   
   #include <linux/list.h>
+ +#include <linux/sched/mm.h>
   #include <drm/drmP.h>
   #include "amdgpu_object.h"
   #include "amdgpu_vm.h"
@@@ -34,20 -33,10 +34,20 @@@
    */
   #define VI_BO_SIZE_ALIGN (0x8000)
   
+ +/* BO flag to indicate a KFD userptr BO */
+ +#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+ +
+ +/* Userptr restore delay, just long enough to allow consecutive VM
+ + * changes to accumulate
+ + */
+ +#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+ +
   /* Impose limit on how much memory KFD can use */
   static struct {
         uint64_t max_system_mem_limit;
+ +      uint64_t max_userptr_mem_limit;
         int64_t system_mem_used;
+ +      int64_t userptr_mem_used;
         spinlock_t mem_limit_lock;
   } kfd_mem_limit;
   
@@@ -68,7 -57,6 +68,7 @@@ static const char * const domain_bit_to
   
   #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
   
+ +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
   
   
   static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@@ -90,7 -78,6 +90,7 @@@ static bool check_if_add_bo_to_vm(struc
   
   /* Set memory usage limits. Current, limits are
    *  System (kernel) memory - 3/8th System RAM
+ + *  Userptr memory - 3/4th System RAM
    */
   void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
   {
@@@ -103,10 -90,8 +103,10 @@@
   
         spin_lock_init(&kfd_mem_limit.mem_limit_lock);
         kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
- -      pr_debug("Kernel memory limit %lluM\n",
- -              (kfd_mem_limit.max_system_mem_limit >> 20));
+ +      kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+ +      pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+ +              (kfd_mem_limit.max_system_mem_limit >> 20),
+ +              (kfd_mem_limit.max_userptr_mem_limit >> 20));
   }
   
   static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@@ -126,16 -111,6 +126,16 @@@
                         goto err_no_mem;
                 }
                 kfd_mem_limit.system_mem_used += (acc_size + size);
+ +      } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ +              if ((kfd_mem_limit.system_mem_used + acc_size >
+ +                      kfd_mem_limit.max_system_mem_limit) ||
+ +                      (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+ +                      kfd_mem_limit.max_userptr_mem_limit)) {
+ +                      ret = -ENOMEM;
+ +                      goto err_no_mem;
+ +              }
+ +              kfd_mem_limit.system_mem_used += acc_size;
+ +              kfd_mem_limit.userptr_mem_used += size;
         }
   err_no_mem:
         spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@@ -151,16 -126,10 +151,16 @@@ static void unreserve_system_mem_limit(
                                        sizeof(struct amdgpu_bo));
   
         spin_lock(&kfd_mem_limit.mem_limit_lock);
- -      if (domain == AMDGPU_GEM_DOMAIN_GTT)
+ +      if (domain == AMDGPU_GEM_DOMAIN_GTT) {
                 kfd_mem_limit.system_mem_used -= (acc_size + size);
+ +      } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ +              kfd_mem_limit.system_mem_used -= acc_size;
+ +              kfd_mem_limit.userptr_mem_used -= size;
+ +      }
         WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
                   "kfd system memory accounting unbalanced");
+ +      WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ +                "kfd userptr memory accounting unbalanced");
   
         spin_unlock(&kfd_mem_limit.mem_limit_lock);
   }
@@@ -169,17 -138,12 +169,17 @@@ void amdgpu_amdkfd_unreserve_system_mem
   {
         spin_lock(&kfd_mem_limit.mem_limit_lock);
   
- -      if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+ +      if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+ +              kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+ +              kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+ +      } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
                 kfd_mem_limit.system_mem_used -=
                         (bo->tbo.acc_size + amdgpu_bo_size(bo));
         }
         WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
                   "kfd system memory accounting unbalanced");
+ +      WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ +                "kfd userptr memory accounting unbalanced");
   
         spin_unlock(&kfd_mem_limit.mem_limit_lock);
   }
@@@ -542,8 -506,7 +542,8 @@@ static void remove_bo_from_vm(struct am
   }
   
   static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
- -                              struct amdkfd_process_info *process_info)
+ +                              struct amdkfd_process_info *process_info,
+ +                              bool userptr)
   {
         struct ttm_validate_buffer *entry = &mem->validate_list;
         struct amdgpu_bo *bo = mem->bo;
@@@ -552,95 -515,10 +552,95 @@@
         entry->shared = true;
         entry->bo = &bo->tbo;
         mutex_lock(&process_info->lock);
- -      list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ +      if (userptr)
+ +              list_add_tail(&entry->head, &process_info->userptr_valid_list);
+ +      else
+ +              list_add_tail(&entry->head, &process_info->kfd_bo_list);
         mutex_unlock(&process_info->lock);
   }
   
+ +/* Initializes user pages. It registers the MMU notifier and validates
+ + * the userptr BO in the GTT domain.
+ + *
+ + * The BO must already be on the userptr_valid_list. Otherwise an
+ + * eviction and restore may happen that leaves the new BO unmapped
+ + * with the user mode queues running.
+ + *
+ + * Takes the process_info->lock to protect against concurrent restore
+ + * workers.
+ + *
+ + * Returns 0 for success, negative errno for errors.
+ + */
+ +static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+ +                         uint64_t user_addr)
+ +{
+ +      struct amdkfd_process_info *process_info = mem->process_info;
+ +      struct amdgpu_bo *bo = mem->bo;
+ +      struct ttm_operation_ctx ctx = { true, false };
+ +      int ret = 0;
+ +
+ +      mutex_lock(&process_info->lock);
+ +
+ +      ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+ +      if (ret) {
+ +              pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+ +              goto out;
+ +      }
+ +
+ +      ret = amdgpu_mn_register(bo, user_addr);
+ +      if (ret) {
+ +              pr_err("%s: Failed to register MMU notifier: %d\n",
+ +                     __func__, ret);
+ +              goto out;
+ +      }
+ +
+ +      /* If no restore worker is running concurrently, user_pages
+ +       * should not be allocated
+ +       */
+ +      WARN(mem->user_pages, "Leaking user_pages array");
+ +
+ +      mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ +                                         sizeof(struct page *),
+ +                                         GFP_KERNEL | __GFP_ZERO);
+ +      if (!mem->user_pages) {
+ +              pr_err("%s: Failed to allocate pages array\n", __func__);
+ +              ret = -ENOMEM;
+ +              goto unregister_out;
+ +      }
+ +
+ +      ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ +      if (ret) {
+ +              pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ +              goto free_out;
+ +      }
+ +
+ +      amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+ +
+ +      ret = amdgpu_bo_reserve(bo, true);
+ +      if (ret) {
+ +              pr_err("%s: Failed to reserve BO\n", __func__);
+ +              goto release_out;
+ +      }
+ +      amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ +      ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ +      if (ret)
+ +              pr_err("%s: failed to validate BO\n", __func__);
+ +      amdgpu_bo_unreserve(bo);
+ +
+ +release_out:
+ +      if (ret)
+ +              release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+ +free_out:
+ +      kvfree(mem->user_pages);
+ +      mem->user_pages = NULL;
+ +unregister_out:
+ +      if (ret)
+ +              amdgpu_mn_unregister(bo);
+ +out:
+ +      mutex_unlock(&process_info->lock);
+ +      return ret;
+ +}
+ +
   /* Reserving a BO and its page table BOs must happen atomically to
    * avoid deadlocks. Some operations update multiple VMs at once. Track
    * all the reservation info in a context structure. Optionally a sync
@@@ -870,8 -748,7 +870,8 @@@ static int update_gpuvm_pte(struct amdg
   }
   
   static int map_bo_to_gpuvm(struct amdgpu_device *adev,
- -              struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+ +              struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+ +              bool no_update_pte)
   {
         int ret;
   
@@@ -885,9 -762,6 +885,9 @@@
                 return ret;
         }
   
+ +      if (no_update_pte)
+ +              return 0;
+ +
         ret = update_gpuvm_pte(adev, entry, sync);
         if (ret) {
                 pr_err("update_gpuvm_pte() failed\n");
@@@ -946,8 -820,6 +946,8 @@@ static int init_kfd_vm(struct amdgpu_v
                 mutex_init(&info->lock);
                 INIT_LIST_HEAD(&info->vm_list_head);
                 INIT_LIST_HEAD(&info->kfd_bo_list);
+ +              INIT_LIST_HEAD(&info->userptr_valid_list);
+ +              INIT_LIST_HEAD(&info->userptr_inval_list);
   
                 info->eviction_fence =
                         amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@@ -958,11 -830,6 +958,11 @@@
                         goto create_evict_fence_fail;
                 }
   
+ +              info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ +              atomic_set(&info->evicted_bos, 0);
+ +              INIT_DELAYED_WORK(&info->restore_userptr_work,
+ +                                amdgpu_amdkfd_restore_userptr_worker);
+ +
                 *process_info = info;
                 *ef = dma_fence_get(&info->eviction_fence->base);
         }
@@@ -1005,7 -872,6 +1005,7 @@@ reserve_pd_fail
                 dma_fence_put(*ef);
                 *ef = NULL;
                 *process_info = NULL;
+ +              put_pid(info->pid);
   create_evict_fence_fail:
                 mutex_destroy(&info->lock);
                 kfree(info);
@@@ -1101,12 -967,8 +1101,12 @@@ void amdgpu_amdkfd_gpuvm_destroy_cb(str
         /* Release per-process resources when last compute VM is destroyed */
         if (!process_info->n_vms) {
                 WARN_ON(!list_empty(&process_info->kfd_bo_list));
+ +              WARN_ON(!list_empty(&process_info->userptr_valid_list));
+ +              WARN_ON(!list_empty(&process_info->userptr_inval_list));
   
                 dma_fence_put(&process_info->eviction_fence->base);
+ +              cancel_delayed_work_sync(&process_info->restore_userptr_work);
+ +              put_pid(process_info->pid);
                 mutex_destroy(&process_info->lock);
                 kfree(process_info);
         }
@@@ -1141,10 -1003,10 +1141,11 @@@ int amdgpu_amdkfd_gpuvm_alloc_memory_of
   {
         struct amdgpu_device *adev = get_amdgpu_device(kgd);
         struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ +      uint64_t user_addr = 0;
         struct amdgpu_bo *bo;
+       struct amdgpu_bo_param bp;
         int byte_align;
- -      u32 alloc_domain;
+ +      u32 domain, alloc_domain;
         u64 alloc_flags;
         uint32_t mapping_flags;
         int ret;
@@@ -1153,21 -1015,14 +1154,21 @@@
          * Check on which domain to allocate BO
          */
         if (flags & ALLOC_MEM_FLAGS_VRAM) {
- -              alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+ +              domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
                 alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
                 alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
                         AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
                         AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
         } else if (flags & ALLOC_MEM_FLAGS_GTT) {
- -              alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ +              domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
                 alloc_flags = 0;
+ +      } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+ +              domain = AMDGPU_GEM_DOMAIN_GTT;
+ +              alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+ +              alloc_flags = 0;
+ +              if (!offset || !*offset)
+ +                      return -EINVAL;
+ +              user_addr = *offset;
         } else {
                 return -EINVAL;
         }
@@@ -1215,8 -1070,14 +1216,14 @@@
         pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
                         va, size, domain_string(alloc_domain));
   
-       ret = amdgpu_bo_create(adev, size, byte_align,
-                               alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+       memset(&bp, 0, sizeof(bp));
+       bp.size = size;
+       bp.byte_align = byte_align;
+       bp.domain = alloc_domain;
+       bp.flags = alloc_flags;
+       bp.type = ttm_bo_type_device;
+       bp.resv = NULL;
+       ret = amdgpu_bo_create(adev, &bp, &bo);
         if (ret) {
                 pr_debug("Failed to create BO on domain %s. ret %d\n",
                                 domain_string(alloc_domain), ret);
@@@ -1224,34 -1085,18 +1231,34 @@@
         }
         bo->kfd_bo = *mem;
         (*mem)->bo = bo;
+ +      if (user_addr)
+ +              bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
   
         (*mem)->va = va;
- -      (*mem)->domain = alloc_domain;
+ +      (*mem)->domain = domain;
         (*mem)->mapped_to_gpu_memory = 0;
         (*mem)->process_info = avm->process_info;
- -      add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+ +      add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+ +
+ +      if (user_addr) {
+ +              ret = init_user_pages(*mem, current->mm, user_addr);
+ +              if (ret) {
+ +                      mutex_lock(&avm->process_info->lock);
+ +                      list_del(&(*mem)->validate_list.head);
+ +                      mutex_unlock(&avm->process_info->lock);
+ +                      goto allocate_init_user_pages_failed;
+ +              }
+ +      }
   
         if (offset)
                 *offset = amdgpu_bo_mmap_offset(bo);
   
         return 0;
   
+ +allocate_init_user_pages_failed:
+ +      amdgpu_bo_unref(&bo);
+ +      /* Don't unreserve system mem limit twice */
+ +      goto err_reserve_system_mem;
   err_bo_create:
         unreserve_system_mem_limit(adev, size, alloc_domain);
   err_reserve_system_mem:
@@@ -1284,24 -1129,12 +1291,24 @@@ int amdgpu_amdkfd_gpuvm_free_memory_of_
          * be freed anyway
          */
   
+ +      /* No more MMU notifiers */
+ +      amdgpu_mn_unregister(mem->bo);
+ +
         /* Make sure restore workers don't access the BO any more */
         bo_list_entry = &mem->validate_list;
         mutex_lock(&process_info->lock);
         list_del(&bo_list_entry->head);
         mutex_unlock(&process_info->lock);
   
+ +      /* Free user pages if necessary */
+ +      if (mem->user_pages) {
+ +              pr_debug("%s: Freeing user_pages array\n", __func__);
+ +              if (mem->user_pages[0])
+ +                      release_pages(mem->user_pages,
+ +                                      mem->bo->tbo.ttm->num_pages);
+ +              kvfree(mem->user_pages);
+ +      }
+ +
         ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
         if (unlikely(ret))
                 return ret;
@@@ -1347,32 -1180,21 +1354,32 @@@ int amdgpu_amdkfd_gpuvm_map_memory_to_g
         struct kfd_bo_va_list *bo_va_entry = NULL;
         struct kfd_bo_va_list *bo_va_entry_aql = NULL;
         unsigned long bo_size;
- -
- -      /* Make sure restore is not running concurrently.
- -       */
- -      mutex_lock(&mem->process_info->lock);
- -
- -      mutex_lock(&mem->lock);
+ +      bool is_invalid_userptr = false;
   
         bo = mem->bo;
- -
         if (!bo) {
                 pr_err("Invalid BO when mapping memory to GPU\n");
- -              ret = -EINVAL;
- -              goto out;
+ +              return -EINVAL;
+ +      }
+ +
+ +      /* Make sure restore is not running concurrently. Since we
+ +       * don't map invalid userptr BOs, we rely on the next restore
+ +       * worker to do the mapping
+ +       */
+ +      mutex_lock(&mem->process_info->lock);
+ +
+ +      /* Lock mmap-sem. If we find an invalid userptr BO, we can be
+ +       * sure that the MMU notifier is no longer running
+ +       * concurrently and the queues are actually stopped
+ +       */
+ +      if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ +              down_write(&current->mm->mmap_sem);
+ +              is_invalid_userptr = atomic_read(&mem->invalid);
+ +              up_write(&current->mm->mmap_sem);
         }
   
+ +      mutex_lock(&mem->lock);
+ +
         domain = mem->domain;
         bo_size = bo->tbo.mem.size;
   
@@@ -1385,14 -1207,6 +1392,14 @@@
         if (unlikely(ret))
                 goto out;
   
+ +      /* Userptr can be marked as "not invalid", but not actually be
+ +       * validated yet (still in the system domain). In that case
+ +       * the queues are still stopped and we can leave mapping for
+ +       * the next restore worker
+ +       */
+ +      if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ +              is_invalid_userptr = true;
+ +
         if (check_if_add_bo_to_vm(avm, mem)) {
                 ret = add_bo_to_vm(adev, mem, avm, false,
                                 &bo_va_entry);
@@@ -1410,8 -1224,7 +1417,8 @@@
                         goto add_bo_to_vm_failed;
         }
   
- -      if (mem->mapped_to_gpu_memory == 0) {
+ +      if (mem->mapped_to_gpu_memory == 0 &&
+ +          !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
                 /* Validate BO only once. The eviction fence gets added to BO
                  * the first time it is mapped. Validate will wait for all
                  * background evictions to complete.
@@@ -1429,8 -1242,7 +1436,8 @@@
                                         entry->va, entry->va + bo_size,
                                         entry);
   
- -                      ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+ +                      ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+ +                                            is_invalid_userptr);
                         if (ret) {
                                 pr_err("Failed to map radeon bo to gpuvm\n");
                                 goto map_bo_to_gpuvm_failed;
@@@ -1613,337 -1425,6 +1620,337 @@@ bo_reserve_failed
         return ret;
   }
   
+ +/* Evict a userptr BO by stopping the queues if necessary
+ + *
+ + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ + * cannot do any memory allocations, and cannot take any locks that
+ + * are held elsewhere while allocating memory. Therefore this is as
+ + * simple as possible, using atomic counters.
+ + *
+ + * It doesn't do anything to the BO itself. The real work happens in
+ + * restore, where we get updated page addresses. This function only
+ + * ensures that GPU access to the BO is stopped.
+ + */
+ +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+ +                              struct mm_struct *mm)
+ +{
+ +      struct amdkfd_process_info *process_info = mem->process_info;
+ +      int invalid, evicted_bos;
+ +      int r = 0;
+ +
+ +      invalid = atomic_inc_return(&mem->invalid);
+ +      evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+ +      if (evicted_bos == 1) {
+ +              /* First eviction, stop the queues */
+ +              r = kgd2kfd->quiesce_mm(mm);
+ +              if (r)
+ +                      pr_err("Failed to quiesce KFD\n");
+ +              schedule_delayed_work(&process_info->restore_userptr_work,
+ +                      msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/* Update invalid userptr BOs
+ + *
+ + * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ + * userptr_inval_list and updates user pages for all BOs that have
+ + * been invalidated since their last update.
+ + */
+ +static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+ +                                   struct mm_struct *mm)
+ +{
+ +      struct kgd_mem *mem, *tmp_mem;
+ +      struct amdgpu_bo *bo;
+ +      struct ttm_operation_ctx ctx = { false, false };
+ +      int invalid, ret;
+ +
+ +      /* Move all invalidated BOs to the userptr_inval_list and
+ +       * release their user pages by migration to the CPU domain
+ +       */
+ +      list_for_each_entry_safe(mem, tmp_mem,
+ +                               &process_info->userptr_valid_list,
+ +                               validate_list.head) {
+ +              if (!atomic_read(&mem->invalid))
+ +                      continue; /* BO is still valid */
+ +
+ +              bo = mem->bo;
+ +
+ +              if (amdgpu_bo_reserve(bo, true))
+ +                      return -EAGAIN;
+ +              amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ +              ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ +              amdgpu_bo_unreserve(bo);
+ +              if (ret) {
+ +                      pr_err("%s: Failed to invalidate userptr BO\n",
+ +                             __func__);
+ +                      return -EAGAIN;
+ +              }
+ +
+ +              list_move_tail(&mem->validate_list.head,
+ +                             &process_info->userptr_inval_list);
+ +      }
+ +
+ +      if (list_empty(&process_info->userptr_inval_list))
+ +              return 0; /* All evicted userptr BOs were freed */
+ +
+ +      /* Go through userptr_inval_list and update any invalid user_pages */
+ +      list_for_each_entry(mem, &process_info->userptr_inval_list,
+ +                          validate_list.head) {
+ +              invalid = atomic_read(&mem->invalid);
+ +              if (!invalid)
+ +                      /* BO hasn't been invalidated since the last
+ +                       * revalidation attempt. Keep its BO list.
+ +                       */
+ +                      continue;
+ +
+ +              bo = mem->bo;
+ +
+ +              if (!mem->user_pages) {
+ +                      mem->user_pages =
+ +                              kvmalloc_array(bo->tbo.ttm->num_pages,
+ +                                               sizeof(struct page *),
+ +                                               GFP_KERNEL | __GFP_ZERO);
+ +                      if (!mem->user_pages) {
+ +                              pr_err("%s: Failed to allocate pages array\n",
+ +                                     __func__);
+ +                              return -ENOMEM;
+ +                      }
+ +              } else if (mem->user_pages[0]) {
+ +                      release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+ +              }
+ +
+ +              /* Get updated user pages */
+ +              ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+ +                                                 mem->user_pages);
+ +              if (ret) {
+ +                      mem->user_pages[0] = NULL;
+ +                      pr_info("%s: Failed to get user pages: %d\n",
+ +                              __func__, ret);
+ +                      /* Pretend it succeeded. It will fail later
+ +                       * with a VM fault if the GPU tries to access
+ +                       * it. Better than hanging indefinitely with
+ +                       * stalled user mode queues.
+ +                       */
+ +              }
+ +
+ +              /* Mark the BO as valid unless it was invalidated
+ +               * again concurrently
+ +               */
+ +              if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+ +                      return -EAGAIN;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Validate invalid userptr BOs
+ + *
+ + * Validates BOs on the userptr_inval_list, and moves them back to the
+ + * userptr_valid_list. Also updates GPUVM page tables with new page
+ + * addresses and waits for the page table updates to complete.
+ + */
+ +static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+ +{
+ +      struct amdgpu_bo_list_entry *pd_bo_list_entries;
+ +      struct list_head resv_list, duplicates;
+ +      struct ww_acquire_ctx ticket;
+ +      struct amdgpu_sync sync;
+ +
+ +      struct amdgpu_vm *peer_vm;
+ +      struct kgd_mem *mem, *tmp_mem;
+ +      struct amdgpu_bo *bo;
+ +      struct ttm_operation_ctx ctx = { false, false };
+ +      int i, ret;
+ +
+ +      pd_bo_list_entries = kcalloc(process_info->n_vms,
+ +                                   sizeof(struct amdgpu_bo_list_entry),
+ +                                   GFP_KERNEL);
+ +      if (!pd_bo_list_entries) {
+ +              pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      INIT_LIST_HEAD(&resv_list);
+ +      INIT_LIST_HEAD(&duplicates);
+ +
+ +      /* Get all the page directory BOs that need to be reserved */
+ +      i = 0;
+ +      list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ +                          vm_list_node)
+ +              amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+ +                                  &pd_bo_list_entries[i++]);
+ +      /* Add the userptr_inval_list entries to resv_list */
+ +      list_for_each_entry(mem, &process_info->userptr_inval_list,
+ +                          validate_list.head) {
+ +              list_add_tail(&mem->resv_list.head, &resv_list);
+ +              mem->resv_list.bo = mem->validate_list.bo;
+ +              mem->resv_list.shared = mem->validate_list.shared;
+ +      }
+ +
+ +      /* Reserve all BOs and page tables for validation */
+ +      ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+ +      WARN(!list_empty(&duplicates), "Duplicates should be empty");
+ +      if (ret)
+ +              goto out;
+ +
+ +      amdgpu_sync_create(&sync);
+ +
+ +      /* Avoid triggering eviction fences when unmapping invalid
+ +       * userptr BOs (waits for all fences, doesn't use
+ +       * FENCE_OWNER_VM)
+ +       */
+ +      list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ +                          vm_list_node)
+ +              amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+ +                                              process_info->eviction_fence,
+ +                                              NULL, NULL);
+ +
+ +      ret = process_validate_vms(process_info);
+ +      if (ret)
+ +              goto unreserve_out;
+ +
+ +      /* Validate BOs and update GPUVM page tables */
+ +      list_for_each_entry_safe(mem, tmp_mem,
+ +                               &process_info->userptr_inval_list,
+ +                               validate_list.head) {
+ +              struct kfd_bo_va_list *bo_va_entry;
+ +
+ +              bo = mem->bo;
+ +
+ +              /* Copy pages array and validate the BO if we got user pages */
+ +              if (mem->user_pages[0]) {
+ +                      amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+ +                                                   mem->user_pages);
+ +                      amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ +                      ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ +                      if (ret) {
+ +                              pr_err("%s: failed to validate BO\n", __func__);
+ +                              goto unreserve_out;
+ +                      }
+ +              }
+ +
+ +              /* Validate succeeded, now the BO owns the pages, free
+ +               * our copy of the pointer array. Put this BO back on
+ +               * the userptr_valid_list. If we need to revalidate
+ +               * it, we need to start from scratch.
+ +               */
+ +              kvfree(mem->user_pages);
+ +              mem->user_pages = NULL;
+ +              list_move_tail(&mem->validate_list.head,
+ +                             &process_info->userptr_valid_list);
+ +
+ +              /* Update mapping. If the BO was not validated
+ +               * (because we couldn't get user pages), this will
+ +               * clear the page table entries, which will result in
+ +               * VM faults if the GPU tries to access the invalid
+ +               * memory.
+ +               */
+ +              list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+ +                      if (!bo_va_entry->is_mapped)
+ +                              continue;
+ +
+ +                      ret = update_gpuvm_pte((struct amdgpu_device *)
+ +                                             bo_va_entry->kgd_dev,
+ +                                             bo_va_entry, &sync);
+ +                      if (ret) {
+ +                              pr_err("%s: update PTE failed\n", __func__);
+ +                              /* make sure this gets validated again */
+ +                              atomic_inc(&mem->invalid);
+ +                              goto unreserve_out;
+ +                      }
+ +              }
+ +      }
+ +
+ +      /* Update page directories */
+ +      ret = process_update_pds(process_info, &sync);
+ +
+ +unreserve_out:
+ +      list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ +                          vm_list_node)
+ +              amdgpu_bo_fence(peer_vm->root.base.bo,
+ +                              &process_info->eviction_fence->base, true);
+ +      ttm_eu_backoff_reservation(&ticket, &resv_list);
+ +      amdgpu_sync_wait(&sync, false);
+ +      amdgpu_sync_free(&sync);
+ +out:
+ +      kfree(pd_bo_list_entries);
+ +
+ +      return ret;
+ +}
+ +
+ +/* Worker callback to restore evicted userptr BOs
+ + *
+ + * Tries to update and validate all userptr BOs. If successful and no
+ + * concurrent evictions happened, the queues are restarted. Otherwise,
+ + * reschedule for another attempt later.
+ + */
+ +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+ +{
+ +      struct delayed_work *dwork = to_delayed_work(work);
+ +      struct amdkfd_process_info *process_info =
+ +              container_of(dwork, struct amdkfd_process_info,
+ +                           restore_userptr_work);
+ +      struct task_struct *usertask;
+ +      struct mm_struct *mm;
+ +      int evicted_bos;
+ +
+ +      evicted_bos = atomic_read(&process_info->evicted_bos);
+ +      if (!evicted_bos)
+ +              return;
+ +
+ +      /* Reference task and mm in case of concurrent process termination */
+ +      usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+ +      if (!usertask)
+ +              return;
+ +      mm = get_task_mm(usertask);
+ +      if (!mm) {
+ +              put_task_struct(usertask);
+ +              return;
+ +      }
+ +
+ +      mutex_lock(&process_info->lock);
+ +
+ +      if (update_invalid_user_pages(process_info, mm))
+ +              goto unlock_out;
+ +      /* userptr_inval_list can be empty if all evicted userptr BOs
+ +       * have been freed. In that case there is nothing to validate
+ +       * and we can just restart the queues.
+ +       */
+ +      if (!list_empty(&process_info->userptr_inval_list)) {
+ +              if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+ +                      goto unlock_out; /* Concurrent eviction, try again */
+ +
+ +              if (validate_invalid_user_pages(process_info))
+ +                      goto unlock_out;
+ +      }
+ +      /* Final check for concurrent evicton and atomic update. If
+ +       * another eviction happens after successful update, it will
+ +       * be a first eviction that calls quiesce_mm. The eviction
+ +       * reference counting inside KFD will handle this case.
+ +       */
+ +      if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+ +          evicted_bos)
+ +              goto unlock_out;
+ +      evicted_bos = 0;
+ +      if (kgd2kfd->resume_mm(mm)) {
+ +              pr_err("%s: Failed to resume KFD\n", __func__);
+ +              /* No recovery from this failure. Probably the CP is
+ +               * hanging. No point trying again.
+ +               */
+ +      }
+ +unlock_out:
+ +      mutex_unlock(&process_info->lock);
+ +      mmput(mm);
+ +      put_task_struct(usertask);
+ +
+ +      /* If validation failed, reschedule another attempt */
+ +      if (evicted_bos)
+ +              schedule_delayed_work(&process_info->restore_userptr_work,
+ +                      msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+ +}
+ +
   /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
    *   KFD process identified by process_info
    *
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 8e66f3702b7cf748c0a8e336255fdcde945ddfd3,e1756b68a17beda71956555e07b5b8595f786e27..9c1d491d742e095c24b9cff843cb9830c3d67753
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@@ -382,8 -382,7 +382,7 @@@ retry
   
         p->bytes_moved += ctx.bytes_moved;
         if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-           bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-           bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+           amdgpu_bo_in_cpu_visible_vram(bo))
                 p->bytes_moved_vis += ctx.bytes_moved;
   
         if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@@ -411,7 -410,6 +410,6 @@@ static bool amdgpu_cs_try_evict(struct 
                 struct amdgpu_bo_list_entry *candidate = p->evictable;
                 struct amdgpu_bo *bo = candidate->robj;
                 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-               u64 initial_bytes_moved, bytes_moved;
                 bool update_bytes_moved_vis;
                 uint32_t other;
   
@@@ -435,18 -433,14 +433,14 @@@
                         continue;
   
                 /* Good we can try to move this BO somewhere else */
-               amdgpu_ttm_placement_from_domain(bo, other);
                 update_bytes_moved_vis =
                         adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-                       bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-                       bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
-               initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+                       amdgpu_bo_in_cpu_visible_vram(bo);
+               amdgpu_ttm_placement_from_domain(bo, other);
                 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-               bytes_moved = atomic64_read(&adev->num_bytes_moved) -
-                       initial_bytes_moved;
-               p->bytes_moved += bytes_moved;
+               p->bytes_moved += ctx.bytes_moved;
                 if (update_bytes_moved_vis)
-                       p->bytes_moved_vis += bytes_moved;
+                       p->bytes_moved_vis += ctx.bytes_moved;
   
                 if (unlikely(r))
                         break;
@@@ -536,7 -530,7 +530,7 @@@ static int amdgpu_cs_parser_bos(struct 
         if (p->bo_list) {
                 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
                 if (p->bo_list->first_userptr != p->bo_list->num_entries)
- -                      p->mn = amdgpu_mn_get(p->adev);
+ +                      p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
         }
   
         INIT_LIST_HEAD(&duplicates);
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index c713d30cba86854accffcacddefb2218ec319aaf,5e9fd256faada5ad98f8ca4c0e8b083a52322df1..69a2b25b3696e9505a468d8c57509ba99351caa1
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@@ -111,7 -111,7 +111,7 @@@ static int amdgpu_ttm_global_init(struc
         ring = adev->mman.buffer_funcs_ring;
         rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
         r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
-                                 rq, amdgpu_sched_jobs, NULL);
+                                 rq, NULL);
         if (r) {
                 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
                 goto error_entity;
@@@ -223,20 -223,8 +223,8 @@@ static void amdgpu_evict_flags(struct t
                 if (!adev->mman.buffer_funcs_enabled) {
                         amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
                 } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-                          !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-                       unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
-                       struct drm_mm_node *node = bo->mem.mm_node;
-                       unsigned long pages_left;
- 
-                       for (pages_left = bo->mem.num_pages;
-                            pages_left;
-                            pages_left -= node->size, node++) {
-                               if (node->start < fpfn)
-                                       break;
-                       }
- 
-                       if (!pages_left)
-                               goto gtt;
+                          !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+                          amdgpu_bo_in_cpu_visible_vram(abo)) {
   
                         /* Try evicting to the CPU inaccessible part of VRAM
                          * first, but only set GTT as busy placement, so this
@@@ -245,12 -233,11 +233,11 @@@
                          */
                         amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
                                                          AMDGPU_GEM_DOMAIN_GTT);
-                       abo->placements[0].fpfn = fpfn;
+                       abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
                         abo->placements[0].lpfn = 0;
                         abo->placement.busy_placement = &abo->placements[1];
                         abo->placement.num_busy_placement = 1;
                 } else {
- gtt:
                         amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
                 }
                 break;
@@@ -695,7 -682,7 +682,7 @@@ struct amdgpu_ttm_tt 
         struct ttm_dma_tt       ttm;
         u64                     offset;
         uint64_t                userptr;
- -      struct mm_struct        *usermm;
+ +      struct task_struct      *usertask;
         uint32_t                userflags;
         spinlock_t              guptasklock;
         struct list_head        guptasks;
@@@ -706,18 -693,14 +693,18 @@@
   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
   {
         struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ +      struct mm_struct *mm = gtt->usertask->mm;
         unsigned int flags = 0;
         unsigned pinned = 0;
         int r;
   
+ +      if (!mm) /* Happens during process shutdown */
+ +              return -ESRCH;
+ +
         if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
                 flags |= FOLL_WRITE;
   
- -      down_read(&current->mm->mmap_sem);
+ +      down_read(&mm->mmap_sem);
   
         if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
                 /* check that we only use anonymous memory
@@@ -725,9 -708,9 +712,9 @@@
                 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
                 struct vm_area_struct *vma;
   
- -              vma = find_vma(gtt->usermm, gtt->userptr);
+ +              vma = find_vma(mm, gtt->userptr);
                 if (!vma || vma->vm_file || vma->vm_end < end) {
- -                      up_read(&current->mm->mmap_sem);
+ +                      up_read(&mm->mmap_sem);
                         return -EPERM;
                 }
         }
@@@ -743,12 -726,7 +730,12 @@@
                 list_add(&guptask.list, &gtt->guptasks);
                 spin_unlock(&gtt->guptasklock);
   
- -              r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ +              if (mm == current->mm)
+ +                      r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ +              else
+ +                      r = get_user_pages_remote(gtt->usertask,
+ +                                      mm, userptr, num_pages,
+ +                                      flags, p, NULL, NULL);
   
                 spin_lock(&gtt->guptasklock);
                 list_del(&guptask.list);
@@@ -761,12 -739,12 +748,12 @@@
   
         } while (pinned < ttm->num_pages);
   
- -      up_read(&current->mm->mmap_sem);
+ +      up_read(&mm->mmap_sem);
         return 0;
   
   release_pages:
         release_pages(pages, pinned);
- -      up_read(&current->mm->mmap_sem);
+ +      up_read(&mm->mmap_sem);
         return r;
   }
   
@@@ -856,6 -834,45 +843,45 @@@ static void amdgpu_ttm_tt_unpin_userptr
         sg_free_table(ttm->sg);
   }
   
+ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+                               struct ttm_buffer_object *tbo,
+                               uint64_t flags)
+ {
+       struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+       struct ttm_tt *ttm = tbo->ttm;
+       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       int r;
+ 
+       if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+               uint64_t page_idx = 1;
+ 
+               r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
+                               ttm->pages, gtt->ttm.dma_address, flags);
+               if (r)
+                       goto gart_bind_fail;
+ 
+               /* Patch mtype of the second part BO */
+               flags &=  ~AMDGPU_PTE_MTYPE_MASK;
+               flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+ 
+               r = amdgpu_gart_bind(adev,
+                               gtt->offset + (page_idx << PAGE_SHIFT),
+                               ttm->num_pages - page_idx,
+                               &ttm->pages[page_idx],
+                               &(gtt->ttm.dma_address[page_idx]), flags);
+       } else {
+               r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+                                    ttm->pages, gtt->ttm.dma_address, flags);
+       }
+ 
+ gart_bind_fail:
+       if (r)
+               DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+                         ttm->num_pages, gtt->offset);
+ 
+       return r;
+ }
+ 
   static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
                                    struct ttm_mem_reg *bo_mem)
   {
@@@ -929,8 -946,7 +955,7 @@@ int amdgpu_ttm_alloc_gart(struct ttm_bu
   
         flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
         gtt->offset = (u64)tmp.start << PAGE_SHIFT;
-       r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
-                            bo->ttm->pages, gtt->ttm.dma_address, flags);
+       r = amdgpu_ttm_gart_bind(adev, bo, flags);
         if (unlikely(r)) {
                 ttm_bo_mem_put(bo, &tmp);
                 return r;
@@@ -947,19 -963,15 +972,15 @@@
   int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
   {
         struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
-       struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
         uint64_t flags;
         int r;
   
-       if (!gtt)
+       if (!tbo->ttm)
                 return 0;
   
-       flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
-       r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
-                            gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
-       if (r)
-               DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
-                         gtt->ttm.ttm.num_pages, gtt->offset);
+       flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
+       r = amdgpu_ttm_gart_bind(adev, tbo, flags);
+ 
         return r;
   }
   
@@@ -987,9 -999,6 +1008,9 @@@ static void amdgpu_ttm_backend_destroy(
   {
         struct amdgpu_ttm_tt *gtt = (void *)ttm;
   
+ +      if (gtt->usertask)
+ +              put_task_struct(gtt->usertask);
+ +
         ttm_dma_tt_fini(&gtt->ttm);
         kfree(gtt);
   }
@@@ -1091,13 -1100,8 +1112,13 @@@ int amdgpu_ttm_tt_set_userptr(struct tt
                 return -EINVAL;
   
         gtt->userptr = addr;
- -      gtt->usermm = current->mm;
         gtt->userflags = flags;
+ +
+ +      if (gtt->usertask)
+ +              put_task_struct(gtt->usertask);
+ +      gtt->usertask = current->group_leader;
+ +      get_task_struct(gtt->usertask);
+ +
         spin_lock_init(&gtt->guptasklock);
         INIT_LIST_HEAD(&gtt->guptasks);
         atomic_set(&gtt->mmu_invalidations, 0);
@@@ -1113,10 -1117,7 +1134,10 @@@ struct mm_struct *amdgpu_ttm_tt_get_use
         if (gtt == NULL)
                 return NULL;
   
- -      return gtt->usermm;
+ +      if (gtt->usertask == NULL)
+ +              return NULL;
+ +
+ +      return gtt->usertask->mm;
   }
   
   bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
@@@ -1349,6 -1350,7 +1370,7 @@@ static void amdgpu_ttm_fw_reserve_vram_
   static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
   {
         struct ttm_operation_ctx ctx = { false, false };
+       struct amdgpu_bo_param bp;
         int r = 0;
         int i;
         u64 vram_size = adev->gmc.visible_vram_size;
@@@ -1356,17 -1358,21 +1378,21 @@@
         u64 size = adev->fw_vram_usage.size;
         struct amdgpu_bo *bo;
   
+       memset(&bp, 0, sizeof(bp));
+       bp.size = adev->fw_vram_usage.size;
+       bp.byte_align = PAGE_SIZE;
+       bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+       bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+               AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+       bp.type = ttm_bo_type_kernel;
+       bp.resv = NULL;
         adev->fw_vram_usage.va = NULL;
         adev->fw_vram_usage.reserved_bo = NULL;
   
         if (adev->fw_vram_usage.size > 0 &&
                 adev->fw_vram_usage.size <= vram_size) {
   
-               r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
-                                    AMDGPU_GEM_DOMAIN_VRAM,
-                                    AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-                                    AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-                                    ttm_bo_type_kernel, NULL,
+               r = amdgpu_bo_create(adev, &bp,
                                      &adev->fw_vram_usage.reserved_bo);
                 if (r)
                         goto error_create;
@@@ -1474,12 -1480,14 +1500,14 @@@ int amdgpu_ttm_init(struct amdgpu_devic
                 return r;
         }
   
-       r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
-                                   AMDGPU_GEM_DOMAIN_VRAM,
-                                   &adev->stolen_vga_memory,
-                                   NULL, NULL);
-       if (r)
-               return r;
+       if (adev->gmc.stolen_size) {
+               r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+                                           AMDGPU_GEM_DOMAIN_VRAM,
+                                           &adev->stolen_vga_memory,
+                                           NULL, NULL);
+               if (r)
+                       return r;
+       }
         DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
                  (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
   
@@@ -1548,13 -1556,17 +1576,17 @@@
         return 0;
   }
   
+ void amdgpu_ttm_late_init(struct amdgpu_device *adev)
+ {
+       amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+ }
+ 
   void amdgpu_ttm_fini(struct amdgpu_device *adev)
   {
         if (!adev->mman.initialized)
                 return;
   
         amdgpu_ttm_debugfs_fini(adev);
-       amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
         amdgpu_ttm_fw_reserve_vram_fini(adev);
         if (adev->mman.aper_base_kaddr)
                 iounmap(adev->mman.aper_base_kaddr);
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

index 5916cc25e28be5e8b2497e5b663da49f6c4b03d9,ee71c40b392061631fb1976f80f1fafdf1b7add1..75592bd04d6ad65fb1c08cd6e020b0492f7d2af7
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@@ -161,8 -161,38 +161,38 @@@ void amdgpu_ucode_print_rlc_hdr(const s
                           le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
                 DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
                           le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
-               DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
-                         le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+               DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+                         le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+               if (version_minor == 1) {
+                       const struct rlc_firmware_header_v2_1 *v2_1 =
+                               container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+                       DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+                                 le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+                       DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+                       DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+                       DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+                       DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+                       DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+                       DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+                       DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+                       DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+                       DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+                       DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+                       DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+                       DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+               }
         } else {
                 DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
         }
@@@ -265,6 -295,7 +295,7 @@@ amdgpu_ucode_get_load_type(struct amdgp
         case CHIP_POLARIS10:
         case CHIP_POLARIS11:
         case CHIP_POLARIS12:
+       case CHIP_VEGAM:
                 if (!load_type)
                         return AMDGPU_FW_LOAD_DIRECT;
                 else
@@@ -277,7 -308,7 +308,7 @@@
                 else
                         return AMDGPU_FW_LOAD_PSP;
         default:
- -              DRM_ERROR("Unknow firmware load type\n");
+ +              DRM_ERROR("Unknown firmware load type\n");
         }
   
         return AMDGPU_FW_LOAD_DIRECT;
@@@ -307,7 -338,10 +338,10 @@@ static int amdgpu_ucode_init_single_fw(
             (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
              ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
              ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
-            ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
+            ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
+            ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
+            ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
+            ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
                 ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
   
                 memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@@ -329,6 -363,18 +363,18 @@@
                                               le32_to_cpu(header->ucode_array_offset_bytes) +
                                               le32_to_cpu(cp_hdr->jt_offset) * 4),
                        ucode->ucode_size);
+       } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
+               ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+               memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
+                      ucode->ucode_size);
+       } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
+               ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+               memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
+                      ucode->ucode_size);
+       } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+               ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+               memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
+                      ucode->ucode_size);
         }
   
         return 0;
diff --combined drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index e5962e61beb57051bc4b1cc4efa76af1e6777ff6,2c5e2a41632e97779f543b75382e4ea516282084..fc1911834ab582ce5972b4814399fbd72a5c95c0
--- 1/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
--- 2/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@@ -41,7 -41,6 +41,6 @@@
   #define GFX9_MEC_HPD_SIZE 2048
   #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
   #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
- #define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
   
   #define mmPWR_MISC_CNTL_STATUS                                        0x0183
   #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                               0
@@@ -185,6 -184,30 +184,30 @@@ static const struct soc15_reg_golden go
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
   };
   
+ static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
+ {
+       mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ };
+ 
+ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
+ {
+       mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ };
+ 
   #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
   #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
   #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@@ -401,6 -424,27 +424,27 @@@ static void gfx_v9_0_free_microcode(str
         kfree(adev->gfx.rlc.register_list_format);
   }
   
+ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+ {
+       const struct rlc_firmware_header_v2_1 *rlc_hdr;
+ 
+       rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+       adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+       adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+       adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+       adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+       adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+       adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+       adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+       adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+       adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+       adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+       adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+       adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+       adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+                       le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+ }
+ 
   static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
   {
         const char *chip_name;
@@@ -412,6 -456,8 +456,8 @@@
         const struct rlc_firmware_header_v2_0 *rlc_hdr;
         unsigned int *tmp = NULL;
         unsigned int i = 0;
+       uint16_t version_major;
+       uint16_t version_minor;
   
         DRM_DEBUG("\n");
   
@@@ -468,6 -514,12 +514,12 @@@
                 goto out;
         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ 
+       version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+       version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+       if (version_major == 2 && version_minor == 1)
+               adev->gfx.rlc.is_rlc_v2_1 = true;
+ 
         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
         adev->gfx.rlc.save_and_restore_offset =
@@@ -508,6 -560,9 +560,9 @@@
         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
   
+       if (adev->gfx.rlc.is_rlc_v2_1)
+               gfx_v9_0_init_rlc_ext_microcode(adev);
+ 
         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
         if (err)
@@@ -566,6 -621,26 +621,26 @@@
                 adev->firmware.fw_size +=
                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   
+               if (adev->gfx.rlc.is_rlc_v2_1) {
+                       info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+                       info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+                       info->fw = adev->gfx.rlc_fw;
+                       adev->firmware.fw_size +=
+                               ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+ 
+                       info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+                       info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+                       info->fw = adev->gfx.rlc_fw;
+                       adev->firmware.fw_size +=
+                               ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+ 
+                       info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+                       info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+                       info->fw = adev->gfx.rlc_fw;
+                       adev->firmware.fw_size +=
+                               ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+               }
+ 
                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
                 info->fw = adev->gfx.mec_fw;
@@@ -1600,6 -1675,7 +1675,7 @@@ static void gfx_v9_0_gpu_init(struct am
   
         gfx_v9_0_setup_rb(adev);
         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+       adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
   
         /* XXX SH_MEM regs */
         /* where to put LDS, scratch, GPUVM in FSA64 space */
@@@ -1616,7 -1692,10 +1692,10 @@@
                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
                         WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
-                       tmp = adev->gmc.shared_aperture_start >> 48;
+                       tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+                               (adev->gmc.private_aperture_start >> 48));
+                       tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+                               (adev->gmc.shared_aperture_start >> 48));
                         WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
                 }
         }
@@@ -1708,55 -1787,42 +1787,42 @@@ static void gfx_v9_0_init_csb(struct am
                         adev->gfx.rlc.clear_state_size);
   }
   
- static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
+ static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
                                 int indirect_offset,
                                 int list_size,
                                 int *unique_indirect_regs,
                                 int *unique_indirect_reg_count,
-                               int max_indirect_reg_count,
                                 int *indirect_start_offsets,
-                               int *indirect_start_offsets_count,
-                               int max_indirect_start_offsets_count)
+                               int *indirect_start_offsets_count)
   {
         int idx;
-       bool new_entry = true;
   
         for (; indirect_offset < list_size; indirect_offset++) {
+               indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+               *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
   
-               if (new_entry) {
-                       new_entry = false;
-                       indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
-                       *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
-                       BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
-               }
+               while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
+                       indirect_offset += 2;
   
-               if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
-                       new_entry = true;
-                       continue;
-               }
+                       /* look for the matching indice */
+                       for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
+                               if (unique_indirect_regs[idx] ==
+                                       register_list_format[indirect_offset] ||
+                                       !unique_indirect_regs[idx])
+                                       break;
+                       }
   
-               indirect_offset += 2;
+                       BUG_ON(idx >= *unique_indirect_reg_count);
   
-               /* look for the matching indice */
-               for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
-                       if (unique_indirect_regs[idx] ==
-                               register_list_format[indirect_offset])
-                               break;
-               }
+                       if (!unique_indirect_regs[idx])
+                               unique_indirect_regs[idx] = register_list_format[indirect_offset];
   
-               if (idx >= *unique_indirect_reg_count) {
-                       unique_indirect_regs[*unique_indirect_reg_count] =
-                               register_list_format[indirect_offset];
-                       idx = *unique_indirect_reg_count;
-                       *unique_indirect_reg_count = *unique_indirect_reg_count + 1;
-                       BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
+                       indirect_offset++;
                 }
- 
-               register_list_format[indirect_offset] = idx;
         }
   }
   
- static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
+ static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
   {
         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
         int unique_indirect_reg_count = 0;
@@@ -1765,7 -1831,7 +1831,7 @@@
         int indirect_start_offsets_count = 0;
   
         int list_size = 0;
-       int i = 0;
+       int i = 0, j = 0;
         u32 tmp = 0;
   
         u32 *register_list_format =
@@@ -1776,15 -1842,14 +1842,14 @@@
                 adev->gfx.rlc.reg_list_format_size_bytes);
   
         /* setup unique_indirect_regs array and indirect_start_offsets array */
-       gfx_v9_0_parse_ind_reg_list(register_list_format,
-                               GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
-                               adev->gfx.rlc.reg_list_format_size_bytes >> 2,
-                               unique_indirect_regs,
-                               &unique_indirect_reg_count,
-                               ARRAY_SIZE(unique_indirect_regs),
-                               indirect_start_offsets,
-                               &indirect_start_offsets_count,
-                               ARRAY_SIZE(indirect_start_offsets));
+       unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
+       gfx_v9_1_parse_ind_reg_list(register_list_format,
+                                   adev->gfx.rlc.reg_list_format_direct_reg_list_length,
+                                   adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+                                   unique_indirect_regs,
+                                   &unique_indirect_reg_count,
+                                   indirect_start_offsets,
+                                   &indirect_start_offsets_count);
   
         /* enable auto inc in case it is disabled */
         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@@@ -1798,19 -1863,37 +1863,37 @@@
                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
                         adev->gfx.rlc.register_restore[i]);
   
-       /* load direct register */
-       WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
-       for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
-               WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
-                       adev->gfx.rlc.register_restore[i]);
- 
         /* load indirect register */
         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
                 adev->gfx.rlc.reg_list_format_start);
-       for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+ 
+       /* direct register portion */
+       for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
                         register_list_format[i]);
   
+       /* indirect register portion */
+       while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
+               if (register_list_format[i] == 0xFFFFFFFF) {
+                       WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+                       continue;
+               }
+ 
+               WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+               WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+ 
+               for (j = 0; j < unique_indirect_reg_count; j++) {
+                       if (register_list_format[i] == unique_indirect_regs[j]) {
+                               WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
+                               break;
+                       }
+               }
+ 
+               BUG_ON(j >= unique_indirect_reg_count);
+ 
+               i++;
+       }
+ 
         /* set save/restore list size */
         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
         list_size = list_size >> 1;
@@@ -1823,14 -1906,19 +1906,19 @@@
                 adev->gfx.rlc.starting_offsets_start);
         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
-                       indirect_start_offsets[i]);
+                      indirect_start_offsets[i]);
   
         /* load unique indirect regs*/
         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
-               WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
-                       unique_indirect_regs[i] & 0x3FFFF);
-               WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
-                       unique_indirect_regs[i] >> 20);
+               if (unique_indirect_regs[i] != 0) {
+                       WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+                              + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
+                              unique_indirect_regs[i] & 0x3FFFF);
+ 
+                       WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+                              + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
+                              unique_indirect_regs[i] >> 20);
+               }
         }
   
         kfree(register_list_format);
@@@ -2010,6 -2098,9 +2098,9 @@@ static void gfx_v9_0_enable_gfx_dynamic
   
   static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
   {
+       if (!adev->gfx.rlc.is_rlc_v2_1)
+               return;
+ 
         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
                               AMD_PG_SUPPORT_GFX_SMG |
                               AMD_PG_SUPPORT_GFX_DMG |
@@@ -2017,27 -2108,12 +2108,12 @@@
                               AMD_PG_SUPPORT_GDS |
                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
                 gfx_v9_0_init_csb(adev);
-               gfx_v9_0_init_rlc_save_restore_list(adev);
+               gfx_v9_1_init_rlc_save_restore_list(adev);
                 gfx_v9_0_enable_save_restore_machine(adev);
   
-               if (adev->asic_type == CHIP_RAVEN) {
-                       WREG32(mmRLC_JUMP_TABLE_RESTORE,
-                               adev->gfx.rlc.cp_table_gpu_addr >> 8);
-                       gfx_v9_0_init_gfx_power_gating(adev);
- 
-                       if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
-                               gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
-                               gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
-                       } else {
-                               gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
-                               gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
-                       }
- 
-                       if (adev->pg_flags & AMD_PG_SUPPORT_CP)
-                               gfx_v9_0_enable_cp_power_gating(adev, true);
-                       else
-                               gfx_v9_0_enable_cp_power_gating(adev, false);
-               }
+               WREG32(mmRLC_JUMP_TABLE_RESTORE,
+                      adev->gfx.rlc.cp_table_gpu_addr >> 8);
+               gfx_v9_0_init_gfx_power_gating(adev);
         }
   }
   
@@@ -3061,6 -3137,9 +3137,9 @@@ static int gfx_v9_0_hw_fini(void *handl
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
         int i;
   
+       amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+                                              AMD_PG_STATE_UNGATE);
+ 
         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
   
@@@ -3279,6 -3358,11 +3358,11 @@@ static int gfx_v9_0_late_init(void *han
         if (r)
                 return r;
   
+       r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+                                                  AMD_PG_STATE_GATE);
+       if (r)
+               return r;
+ 
         return 0;
   }
   
@@@ -3339,8 -3423,7 +3423,7 @@@ static void gfx_v9_0_exit_rlc_safe_mode
   static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
                                                 bool enable)
   {
-       /* TODO: double check if we need to perform under safe mdoe */
-       /* gfx_v9_0_enter_rlc_safe_mode(adev); */
+       gfx_v9_0_enter_rlc_safe_mode(adev);
   
         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@@ -3351,7 -3434,7 +3434,7 @@@
                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
         }
   
-       /* gfx_v9_0_exit_rlc_safe_mode(adev); */
+       gfx_v9_0_exit_rlc_safe_mode(adev);
   }
   
   static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@@ -3742,7 -3825,7 +3825,7 @@@ static void gfx_v9_0_ring_emit_ib_gfx(s
         }
   
         amdgpu_ring_write(ring, header);
- BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+       BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
         amdgpu_ring_write(ring,
   #ifdef __BIG_ENDIAN
                 (2 << 0) |
@@@ -3774,13 -3857,16 +3857,16 @@@ static void gfx_v9_0_ring_emit_fence(st
   {
         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+       bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
   
         /* RELEASE_MEM - flush caches, send int */
         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-       amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
-                                EOP_TC_ACTION_EN |
-                                EOP_TC_WB_ACTION_EN |
-                                EOP_TC_MD_ACTION_EN |
+       amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+                                              EOP_TC_NC_ACTION_EN) :
+                                             (EOP_TCL1_ACTION_EN |
+                                              EOP_TC_ACTION_EN |
+                                              EOP_TC_WB_ACTION_EN |
+                                              EOP_TC_MD_ACTION_EN)) |
                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
                                  EVENT_INDEX(5)));
         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@@ -4137,6 -4223,20 +4223,20 @@@ static void gfx_v9_0_ring_emit_reg_wait
         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
   }
   
+ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+                                                 uint32_t reg0, uint32_t reg1,
+                                                 uint32_t ref, uint32_t mask)
+ {
+       int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ 
+       if (amdgpu_sriov_vf(ring->adev))
+               gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+                                     ref, mask, 0x20);
+       else
+               amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+                                                          ref, mask);
+ }
+ 
   static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
                                                  enum amdgpu_interrupt_state state)
   {
@@@ -4458,6 -4558,7 +4558,7 @@@ static const struct amdgpu_ring_funcs g
         .emit_tmz = gfx_v9_0_ring_emit_tmz,
         .emit_wreg = gfx_v9_0_ring_emit_wreg,
         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+       .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
   };
   
   static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@@ -4492,6 -4593,7 +4593,7 @@@
         .set_priority = gfx_v9_0_ring_set_priority_compute,
         .emit_wreg = gfx_v9_0_ring_emit_wreg,
         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+       .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
   };
   
   static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@@ -4522,6 -4624,7 +4624,7 @@@
         .emit_rreg = gfx_v9_0_ring_emit_rreg,
         .emit_wreg = gfx_v9_0_ring_emit_wreg,
         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+       .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
   };
   
   static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@@ -4686,7 -4789,6 +4789,7 @@@ static int gfx_v9_0_get_cu_info(struct 
   
         cu_info->number = active_cu_number;
         cu_info->ao_cu_mask = ao_cu_mask;
+ +      cu_info->simd_per_cu = NUM_SIMD_PER_CU;
   
         return 0;
   }
diff --combined drivers/gpu/drm/amd/amdgpu/soc15d.h

index f22f7a88ce0fe4b7435f0316cb402b49e02993ea,839a144c1645e20066fe1095df962cfa23b44c7b..8dc29107228fd145ad27331a8cf3b0bda55be655
--- 1/drivers/gpu/drm/amd/amdgpu/soc15d.h
--- 2/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@@ -159,6 -159,7 +159,7 @@@
   #define               EOP_TC_WB_ACTION_EN                     (1 << 15) /* L2 */
   #define               EOP_TCL1_ACTION_EN                      (1 << 16)
   #define               EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
+ #define               EOP_TC_NC_ACTION_EN                     (1 << 19)
   #define               EOP_TC_MD_ACTION_EN                     (1 << 21) /* L2 metadata */
   
   #define               DATA_SEL(x)                             ((x) << 29)
@@@ -268,11 -269,6 +269,11 @@@
                          * x=1: tmz_end
                          */
   
+ +#define       PACKET3_INVALIDATE_TLBS                         0x98
+ +#              define PACKET3_INVALIDATE_TLBS_DST_SEL(x)     ((x) << 0)
+ +#              define PACKET3_INVALIDATE_TLBS_ALL_HUB(x)     ((x) << 4)
+ +#              define PACKET3_INVALIDATE_TLBS_PASID(x)       ((x) << 5)
+ +#              define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x)  ((x) << 29)
   #define PACKET3_SET_RESOURCES                         0xA0
   /* 1. header
    * 2. CONTROL
diff --combined drivers/gpu/drm/amd/amdgpu/vce_v4_0.c

index 73fd48d6c756636e19f9dd70bedf2efa3614ebd3,8fd1b742985acad26dbf3fb747fe962acbb6e0a1..8fd1b742985acad26dbf3fb747fe962acbb6e0a1

mode 100644,100755..100644
--- 1/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
--- 2/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@@ -1081,6 -1081,7 +1081,7 @@@ static const struct amdgpu_ring_funcs v
         .end_use = amdgpu_vce_ring_end_use,
         .emit_wreg = vce_v4_0_emit_wreg,
         .emit_reg_wait = vce_v4_0_emit_reg_wait,
+       .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
   };
   
   static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --combined drivers/gpu/drm/amd/display/dc/core/dc.c

index 9cd3566def8d3cee902ee099d9fbad1260e0c9f1,e59357724eac1fcda676714875352c66458ecf92..644b2187507b9845e125214f9e941e9a124a33ef
--- 1/drivers/gpu/drm/amd/display/dc/core/dc.c
--- 2/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@@ -589,7 -589,7 +589,7 @@@ static void disable_dangling_plane(stru
    ******************************************************************************/
   
   struct dc *dc_create(const struct dc_init_data *init_params)
- - {
+ +{
         struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
         unsigned int full_pipe_count;
   
@@@ -936,95 -936,6 +936,6 @@@ bool dc_post_update_surfaces_to_stream(
         return true;
   }
   
- /*
-  * TODO this whole function needs to go
-  *
-  * dc_surface_update is needlessly complex. See if we can just replace this
-  * with a dc_plane_state and follow the atomic model a bit more closely here.
-  */
- bool dc_commit_planes_to_stream(
-               struct dc *dc,
-               struct dc_plane_state **plane_states,
-               uint8_t new_plane_count,
-               struct dc_stream_state *dc_stream,
-               struct dc_state *state)
- {
-       /* no need to dynamically allocate this. it's pretty small */
-       struct dc_surface_update updates[MAX_SURFACES];
-       struct dc_flip_addrs *flip_addr;
-       struct dc_plane_info *plane_info;
-       struct dc_scaling_info *scaling_info;
-       int i;
-       struct dc_stream_update *stream_update =
-                       kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL);
- 
-       if (!stream_update) {
-               BREAK_TO_DEBUGGER();
-               return false;
-       }
- 
-       flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs),
-                           GFP_KERNEL);
-       plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info),
-                            GFP_KERNEL);
-       scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info),
-                              GFP_KERNEL);
- 
-       if (!flip_addr || !plane_info || !scaling_info) {
-               kfree(flip_addr);
-               kfree(plane_info);
-               kfree(scaling_info);
-               kfree(stream_update);
-               return false;
-       }
- 
-       memset(updates, 0, sizeof(updates));
- 
-       stream_update->src = dc_stream->src;
-       stream_update->dst = dc_stream->dst;
-       stream_update->out_transfer_func = dc_stream->out_transfer_func;
- 
-       for (i = 0; i < new_plane_count; i++) {
-               updates[i].surface = plane_states[i];
-               updates[i].gamma =
-                       (struct dc_gamma *)plane_states[i]->gamma_correction;
-               updates[i].in_transfer_func = plane_states[i]->in_transfer_func;
-               flip_addr[i].address = plane_states[i]->address;
-               flip_addr[i].flip_immediate = plane_states[i]->flip_immediate;
-               plane_info[i].color_space = plane_states[i]->color_space;
-               plane_info[i].input_tf = plane_states[i]->input_tf;
-               plane_info[i].format = plane_states[i]->format;
-               plane_info[i].plane_size = plane_states[i]->plane_size;
-               plane_info[i].rotation = plane_states[i]->rotation;
-               plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror;
-               plane_info[i].stereo_format = plane_states[i]->stereo_format;
-               plane_info[i].tiling_info = plane_states[i]->tiling_info;
-               plane_info[i].visible = plane_states[i]->visible;
-               plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha;
-               plane_info[i].dcc = plane_states[i]->dcc;
-               scaling_info[i].scaling_quality = plane_states[i]->scaling_quality;
-               scaling_info[i].src_rect = plane_states[i]->src_rect;
-               scaling_info[i].dst_rect = plane_states[i]->dst_rect;
-               scaling_info[i].clip_rect = plane_states[i]->clip_rect;
- 
-               updates[i].flip_addr = &flip_addr[i];
-               updates[i].plane_info = &plane_info[i];
-               updates[i].scaling_info = &scaling_info[i];
-       }
- 
-       dc_commit_updates_for_stream(
-                       dc,
-                       updates,
-                       new_plane_count,
-                       dc_stream, stream_update, plane_states, state);
- 
-       kfree(flip_addr);
-       kfree(plane_info);
-       kfree(scaling_info);
-       kfree(stream_update);
-       return true;
- }
- 
   struct dc_state *dc_create_state(void)
   {
         struct dc_state *context = kzalloc(sizeof(struct dc_state),
@@@ -1107,9 -1018,6 +1018,6 @@@ static enum surface_update_type get_pla
         if (u->plane_info->color_space != u->surface->color_space)
                 update_flags->bits.color_space_change = 1;
   
-       if (u->plane_info->input_tf != u->surface->input_tf)
-               update_flags->bits.input_tf_change = 1;
- 
         if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror)
                 update_flags->bits.horizontal_mirror_change = 1;
   
@@@ -1243,12 -1151,20 +1151,20 @@@ static enum surface_update_type det_sur
         if (u->input_csc_color_matrix)
                 update_flags->bits.input_csc_change = 1;
   
-       if (update_flags->bits.in_transfer_func_change
-                       || update_flags->bits.input_csc_change) {
+       if (u->coeff_reduction_factor)
+               update_flags->bits.coeff_reduction_change = 1;
+ 
+       if (update_flags->bits.in_transfer_func_change) {
                 type = UPDATE_TYPE_MED;
                 elevate_update_type(&overall_type, type);
         }
   
+       if (update_flags->bits.input_csc_change
+                       || update_flags->bits.coeff_reduction_change) {
+               type = UPDATE_TYPE_FULL;
+               elevate_update_type(&overall_type, type);
+       }
+ 
         return overall_type;
   }
   
@@@ -1297,7 -1213,7 +1213,7 @@@ enum surface_update_type dc_check_updat
         type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status);
         if (type == UPDATE_TYPE_FULL)
                 for (i = 0; i < surface_count; i++)
-                       updates[i].surface->update_flags.bits.full_update = 1;
+                       updates[i].surface->update_flags.raw = 0xFFFFFFFF;
   
         return type;
   }
@@@ -1375,6 -1291,12 +1291,12 @@@ static void commit_planes_for_stream(st
                                         pipe_ctx->stream_res.abm->funcs->set_abm_level(
                                                         pipe_ctx->stream_res.abm, stream->abm_level);
                         }
+ 
+                       if (stream_update && stream_update->periodic_fn_vsync_delta &&
+                                       pipe_ctx->stream_res.tg->funcs->program_vline_interrupt)
+                               pipe_ctx->stream_res.tg->funcs->program_vline_interrupt(
+                                               pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing,
+                                               pipe_ctx->stream->periodic_fn_vsync_delta);
                 }
         }
   
diff --combined drivers/gpu/drm/v3d/v3d_drv.c

index 38e8041b5f0c738c7e958dce36adb22391dd660b,0000000000000000000000000000000000000000..cdb582043b4fc26cc63a64e04eadcd1bd559e752

mode 100644,000000..100644
--- 1/drivers/gpu/drm/v3d/v3d_drv.c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@@ -1,371 -1,0 +1,371 @@@
-                                     32, NULL);
+ +// SPDX-License-Identifier: GPL-2.0+
+ +/* Copyright (C) 2014-2018 Broadcom */
+ +
+ +/**
+ + * DOC: Broadcom V3D Graphics Driver
+ + *
+ + * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs.
+ + * For V3D 2.x support, see the VC4 driver.
+ + *
+ + * Currently only single-core rendering using the binner and renderer
+ + * is supported.  The TFU (texture formatting unit) and V3D 4.x's CSD
+ + * (compute shader dispatch) are not yet supported.
+ + */
+ +
+ +#include <linux/clk.h>
+ +#include <linux/device.h>
+ +#include <linux/io.h>
+ +#include <linux/module.h>
+ +#include <linux/of_platform.h>
+ +#include <linux/platform_device.h>
+ +#include <linux/pm_runtime.h>
+ +#include <drm/drm_fb_cma_helper.h>
+ +#include <drm/drm_fb_helper.h>
+ +
+ +#include "uapi/drm/v3d_drm.h"
+ +#include "v3d_drv.h"
+ +#include "v3d_regs.h"
+ +
+ +#define DRIVER_NAME "v3d"
+ +#define DRIVER_DESC "Broadcom V3D graphics"
+ +#define DRIVER_DATE "20180419"
+ +#define DRIVER_MAJOR 1
+ +#define DRIVER_MINOR 0
+ +#define DRIVER_PATCHLEVEL 0
+ +
+ +#ifdef CONFIG_PM
+ +static int v3d_runtime_suspend(struct device *dev)
+ +{
+ +      struct drm_device *drm = dev_get_drvdata(dev);
+ +      struct v3d_dev *v3d = to_v3d_dev(drm);
+ +
+ +      v3d_irq_disable(v3d);
+ +
+ +      clk_disable_unprepare(v3d->clk);
+ +
+ +      return 0;
+ +}
+ +
+ +static int v3d_runtime_resume(struct device *dev)
+ +{
+ +      struct drm_device *drm = dev_get_drvdata(dev);
+ +      struct v3d_dev *v3d = to_v3d_dev(drm);
+ +      int ret;
+ +
+ +      ret = clk_prepare_enable(v3d->clk);
+ +      if (ret != 0)
+ +              return ret;
+ +
+ +      /* XXX: VPM base */
+ +
+ +      v3d_mmu_set_page_table(v3d);
+ +      v3d_irq_enable(v3d);
+ +
+ +      return 0;
+ +}
+ +#endif
+ +
+ +static const struct dev_pm_ops v3d_v3d_pm_ops = {
+ +      SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL)
+ +};
+ +
+ +static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
+ +                             struct drm_file *file_priv)
+ +{
+ +      struct v3d_dev *v3d = to_v3d_dev(dev);
+ +      struct drm_v3d_get_param *args = data;
+ +      int ret;
+ +      static const u32 reg_map[] = {
+ +              [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG,
+ +              [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1,
+ +              [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2,
+ +              [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3,
+ +              [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0,
+ +              [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1,
+ +              [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2,
+ +      };
+ +
+ +      if (args->pad != 0)
+ +              return -EINVAL;
+ +
+ +      /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need
+ +       * to explicitly allow it in the "the register in our
+ +       * parameter map" check.
+ +       */
+ +      if (args->param < ARRAY_SIZE(reg_map) &&
+ +          (reg_map[args->param] ||
+ +           args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) {
+ +              u32 offset = reg_map[args->param];
+ +
+ +              if (args->value != 0)
+ +                      return -EINVAL;
+ +
+ +              ret = pm_runtime_get_sync(v3d->dev);
+ +              if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 &&
+ +                  args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) {
+ +                      args->value = V3D_CORE_READ(0, offset);
+ +              } else {
+ +                      args->value = V3D_READ(offset);
+ +              }
+ +              pm_runtime_mark_last_busy(v3d->dev);
+ +              pm_runtime_put_autosuspend(v3d->dev);
+ +              return 0;
+ +      }
+ +
+ +      /* Any params that aren't just register reads would go here. */
+ +
+ +      DRM_DEBUG("Unknown parameter %d\n", args->param);
+ +      return -EINVAL;
+ +}
+ +
+ +static int
+ +v3d_open(struct drm_device *dev, struct drm_file *file)
+ +{
+ +      struct v3d_dev *v3d = to_v3d_dev(dev);
+ +      struct v3d_file_priv *v3d_priv;
+ +      int i;
+ +
+ +      v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
+ +      if (!v3d_priv)
+ +              return -ENOMEM;
+ +
+ +      v3d_priv->v3d = v3d;
+ +
+ +      for (i = 0; i < V3D_MAX_QUEUES; i++) {
+ +              drm_sched_entity_init(&v3d->queue[i].sched,
+ +                                    &v3d_priv->sched_entity[i],
+ +                                    &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
++                                    NULL);
+ +      }
+ +
+ +      file->driver_priv = v3d_priv;
+ +
+ +      return 0;
+ +}
+ +
+ +static void
+ +v3d_postclose(struct drm_device *dev, struct drm_file *file)
+ +{
+ +      struct v3d_dev *v3d = to_v3d_dev(dev);
+ +      struct v3d_file_priv *v3d_priv = file->driver_priv;
+ +      enum v3d_queue q;
+ +
+ +      for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ +              drm_sched_entity_fini(&v3d->queue[q].sched,
+ +                                    &v3d_priv->sched_entity[q]);
+ +      }
+ +
+ +      kfree(v3d_priv);
+ +}
+ +
+ +static const struct file_operations v3d_drm_fops = {
+ +      .owner = THIS_MODULE,
+ +      .open = drm_open,
+ +      .release = drm_release,
+ +      .unlocked_ioctl = drm_ioctl,
+ +      .mmap = v3d_mmap,
+ +      .poll = drm_poll,
+ +      .read = drm_read,
+ +      .compat_ioctl = drm_compat_ioctl,
+ +      .llseek = noop_llseek,
+ +};
+ +
+ +/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
+ + * protection between clients.  Note that render nodes would be be
+ + * able to submit CLs that could access BOs from clients authenticated
+ + * with the master node.
+ + */
+ +static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
+ +      DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
+ +      DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW),
+ +      DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW),
+ +      DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+ +      DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
+ +      DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
+ +};
+ +
+ +static const struct vm_operations_struct v3d_vm_ops = {
+ +      .fault = v3d_gem_fault,
+ +      .open = drm_gem_vm_open,
+ +      .close = drm_gem_vm_close,
+ +};
+ +
+ +static struct drm_driver v3d_drm_driver = {
+ +      .driver_features = (DRIVER_GEM |
+ +                          DRIVER_RENDER |
+ +                          DRIVER_PRIME |
+ +                          DRIVER_SYNCOBJ),
+ +
+ +      .open = v3d_open,
+ +      .postclose = v3d_postclose,
+ +
+ +#if defined(CONFIG_DEBUG_FS)
+ +      .debugfs_init = v3d_debugfs_init,
+ +#endif
+ +
+ +      .gem_free_object_unlocked = v3d_free_object,
+ +      .gem_vm_ops = &v3d_vm_ops,
+ +
+ +      .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ +      .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ +      .gem_prime_import = drm_gem_prime_import,
+ +      .gem_prime_export = drm_gem_prime_export,
+ +      .gem_prime_res_obj = v3d_prime_res_obj,
+ +      .gem_prime_get_sg_table = v3d_prime_get_sg_table,
+ +      .gem_prime_import_sg_table = v3d_prime_import_sg_table,
+ +      .gem_prime_mmap = v3d_prime_mmap,
+ +
+ +      .ioctls = v3d_drm_ioctls,
+ +      .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
+ +      .fops = &v3d_drm_fops,
+ +
+ +      .name = DRIVER_NAME,
+ +      .desc = DRIVER_DESC,
+ +      .date = DRIVER_DATE,
+ +      .major = DRIVER_MAJOR,
+ +      .minor = DRIVER_MINOR,
+ +      .patchlevel = DRIVER_PATCHLEVEL,
+ +};
+ +
+ +static const struct of_device_id v3d_of_match[] = {
+ +      { .compatible = "brcm,7268-v3d" },
+ +      { .compatible = "brcm,7278-v3d" },
+ +      {},
+ +};
+ +MODULE_DEVICE_TABLE(of, v3d_of_match);
+ +
+ +static int
+ +map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
+ +{
+ +      struct resource *res =
+ +              platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name);
+ +
+ +      *regs = devm_ioremap_resource(v3d->dev, res);
+ +      return PTR_ERR_OR_ZERO(*regs);
+ +}
+ +
+ +static int v3d_platform_drm_probe(struct platform_device *pdev)
+ +{
+ +      struct device *dev = &pdev->dev;
+ +      struct drm_device *drm;
+ +      struct v3d_dev *v3d;
+ +      int ret;
+ +      u32 ident1;
+ +
+ +      dev->coherent_dma_mask = DMA_BIT_MASK(36);
+ +
+ +      v3d = kzalloc(sizeof(*v3d), GFP_KERNEL);
+ +      if (!v3d)
+ +              return -ENOMEM;
+ +      v3d->dev = dev;
+ +      v3d->pdev = pdev;
+ +      drm = &v3d->drm;
+ +
+ +      ret = map_regs(v3d, &v3d->bridge_regs, "bridge");
+ +      if (ret)
+ +              goto dev_free;
+ +
+ +      ret = map_regs(v3d, &v3d->hub_regs, "hub");
+ +      if (ret)
+ +              goto dev_free;
+ +
+ +      ret = map_regs(v3d, &v3d->core_regs[0], "core0");
+ +      if (ret)
+ +              goto dev_free;
+ +
+ +      ident1 = V3D_READ(V3D_HUB_IDENT1);
+ +      v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 +
+ +                  V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV));
+ +      v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
+ +      WARN_ON(v3d->cores > 1); /* multicore not yet implemented */
+ +
+ +      if (v3d->ver < 41) {
+ +              ret = map_regs(v3d, &v3d->gca_regs, "gca");
+ +              if (ret)
+ +                      goto dev_free;
+ +      }
+ +
+ +      v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr,
+ +                                      GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+ +      if (!v3d->mmu_scratch) {
+ +              dev_err(dev, "Failed to allocate MMU scratch page\n");
+ +              ret = -ENOMEM;
+ +              goto dev_free;
+ +      }
+ +
+ +      pm_runtime_use_autosuspend(dev);
+ +      pm_runtime_set_autosuspend_delay(dev, 50);
+ +      pm_runtime_enable(dev);
+ +
+ +      ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev);
+ +      if (ret)
+ +              goto dma_free;
+ +
+ +      platform_set_drvdata(pdev, drm);
+ +      drm->dev_private = v3d;
+ +
+ +      ret = v3d_gem_init(drm);
+ +      if (ret)
+ +              goto dev_destroy;
+ +
+ +      v3d_irq_init(v3d);
+ +
+ +      ret = drm_dev_register(drm, 0);
+ +      if (ret)
+ +              goto gem_destroy;
+ +
+ +      return 0;
+ +
+ +gem_destroy:
+ +      v3d_gem_destroy(drm);
+ +dev_destroy:
+ +      drm_dev_put(drm);
+ +dma_free:
+ +      dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+ +dev_free:
+ +      kfree(v3d);
+ +      return ret;
+ +}
+ +
+ +static int v3d_platform_drm_remove(struct platform_device *pdev)
+ +{
+ +      struct drm_device *drm = platform_get_drvdata(pdev);
+ +      struct v3d_dev *v3d = to_v3d_dev(drm);
+ +
+ +      drm_dev_unregister(drm);
+ +
+ +      v3d_gem_destroy(drm);
+ +
+ +      drm_dev_put(drm);
+ +
+ +      dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+ +
+ +      return 0;
+ +}
+ +
+ +static struct platform_driver v3d_platform_driver = {
+ +      .probe          = v3d_platform_drm_probe,
+ +      .remove         = v3d_platform_drm_remove,
+ +      .driver         = {
+ +              .name   = "v3d",
+ +              .of_match_table = v3d_of_match,
+ +      },
+ +};
+ +
+ +static int __init v3d_drm_register(void)
+ +{
+ +      return platform_driver_register(&v3d_platform_driver);
+ +}
+ +
+ +static void __exit v3d_drm_unregister(void)
+ +{
+ +      platform_driver_unregister(&v3d_platform_driver);
+ +}
+ +
+ +module_init(v3d_drm_register);
+ +module_exit(v3d_drm_unregister);
+ +
+ +MODULE_ALIAS("platform:v3d-drm");
+ +MODULE_DESCRIPTION("Broadcom V3D DRM Driver");
+ +MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+ +MODULE_LICENSE("GPL v2");
author	Dave Airlie <airlied@redhat.com>
	Tue, 15 May 2018 22:21:51 +0000 (08:21 +1000)
committer	Dave Airlie <airlied@redhat.com>
	Tue, 15 May 2018 22:31:29 +0000 (08:31 +1000)
		1	2
drivers/gpu/drm/amd/amdgpu/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/soc15d.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/vce_v4_0.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/display/dc/core/dc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/v3d/v3d_drv.c	patch \|	diff1 \|	\|	blob \| history