Merge branch 'drm-next-4.18' of git://people.freedesktop.org/~agd5f/linux into drm...
authorDave Airlie <airlied@redhat.com>
Tue, 15 May 2018 22:21:51 +0000 (08:21 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 15 May 2018 22:31:29 +0000 (08:31 +1000)
Main changes for 4.18.  I'd like to do a separate pull for vega20 later
this week or next.  Highlights:
- Reserve pre-OS scanout buffer during init for seemless transition from
  console to driver
- VEGAM support
- Improved GPU scheduler documentation
- Initial gfxoff support for raven
- SR-IOV fixes
- Default to non-AGP on PowerPC for radeon
- Fine grained clock voltage control for vega10
- Power profiles for vega10
- Further clean up of powerplay/driver interface
- Underlay fixes
- Display link bw updates
- Gamma fixes
- Scatter/Gather display support on CZ/ST
- Misc bug fixes and clean ups

[airlied: fixup v3d vs scheduler API change]

Link: https://patchwork.freedesktop.org/patch/msgid/20180515185450.1113-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
1  2 
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/soc15d.h
drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/v3d/v3d_drv.c

index f3002020df6ccc380b548aa2d9a920cf2a9c710d,2fe4a0bf98c845e08e884f3d1c831fa3591f5f16..68e9f584c570df45eb7973bcd77f64b35b0336c6
@@@ -64,6 -64,10 +64,10 @@@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o g
  amdgpu-y += \
        vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
  
+ # add DF block
+ amdgpu-y += \
+       df_v1_7.o
  # add GMC block
  amdgpu-y += \
        gmc_v7_0.o \
@@@ -130,8 -134,7 +134,8 @@@ amdgpu-y += 
         amdgpu_amdkfd.o \
         amdgpu_amdkfd_fence.o \
         amdgpu_amdkfd_gpuvm.o \
 -       amdgpu_amdkfd_gfx_v8.o
 +       amdgpu_amdkfd_gfx_v8.o \
 +       amdgpu_amdkfd_gfx_v9.o
  
  # add cgs
  amdgpu-y += amdgpu_cgs.o
index cd0e8f192e6a52f20f8a11d7aea656c736ce8029,887702c59488434cab0e1e36b6393ba4dc146560..bd36ee9f7e6d7ed012dd63b6973fdbb112b70998
@@@ -92,10 -92,6 +92,10 @@@ void amdgpu_amdkfd_device_probe(struct 
        case CHIP_POLARIS11:
                kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
                break;
 +      case CHIP_VEGA10:
 +      case CHIP_RAVEN:
 +              kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
 +              break;
        default:
                dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
                return;
@@@ -179,28 -175,6 +179,28 @@@ void amdgpu_amdkfd_device_init(struct a
                                &gpu_resources.doorbell_physical_address,
                                &gpu_resources.doorbell_aperture_size,
                                &gpu_resources.doorbell_start_offset);
 +              if (adev->asic_type >= CHIP_VEGA10) {
 +                      /* On SOC15 the BIF is involved in routing
 +                       * doorbells using the low 12 bits of the
 +                       * address. Communicate the assignments to
 +                       * KFD. KFD uses two doorbell pages per
 +                       * process in case of 64-bit doorbells so we
 +                       * can use each doorbell assignment twice.
 +                       */
 +                      gpu_resources.sdma_doorbell[0][0] =
 +                              AMDGPU_DOORBELL64_sDMA_ENGINE0;
 +                      gpu_resources.sdma_doorbell[0][1] =
 +                              AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
 +                      gpu_resources.sdma_doorbell[1][0] =
 +                              AMDGPU_DOORBELL64_sDMA_ENGINE1;
 +                      gpu_resources.sdma_doorbell[1][1] =
 +                              AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
 +                      /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
 +                       * SDMA, IH and VCN. So don't use them for the CP.
 +                       */
 +                      gpu_resources.reserved_doorbell_mask = 0x1f0;
 +                      gpu_resources.reserved_doorbell_val  = 0x0f0;
 +              }
  
                kgd2kfd->device_init(adev->kfd, &gpu_resources);
        }
@@@ -243,13 -217,19 +243,19 @@@ int alloc_gtt_mem(struct kgd_dev *kgd, 
  {
        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
        struct amdgpu_bo *bo = NULL;
+       struct amdgpu_bo_param bp;
        int r;
        uint64_t gpu_addr_tmp = 0;
        void *cpu_ptr_tmp = NULL;
  
-       r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
-                            AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
-                            NULL, &bo);
+       memset(&bp, 0, sizeof(bp));
+       bp.size = size;
+       bp.byte_align = PAGE_SIZE;
+       bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+       bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+       bp.type = ttm_bo_type_kernel;
+       bp.resv = NULL;
+       r = amdgpu_bo_create(adev, &bp, &bo);
        if (r) {
                dev_err(adev->dev,
                        "failed to allocate BO for amdkfd (%d)\n", r);
index 5296e24fd6620567d995a4b4d6d7651ce0e5907f,c1b0cdb401dcc8ed12b5d8af5512045dd37bcae8..72ab2b1ffe7510ef26b034a60addf51cd123f6fd
@@@ -23,7 -23,6 +23,7 @@@
  #define pr_fmt(fmt) "kfd2kgd: " fmt
  
  #include <linux/list.h>
 +#include <linux/sched/mm.h>
  #include <drm/drmP.h>
  #include "amdgpu_object.h"
  #include "amdgpu_vm.h"
   */
  #define VI_BO_SIZE_ALIGN (0x8000)
  
 +/* BO flag to indicate a KFD userptr BO */
 +#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
 +
 +/* Userptr restore delay, just long enough to allow consecutive VM
 + * changes to accumulate
 + */
 +#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
 +
  /* Impose limit on how much memory KFD can use */
  static struct {
        uint64_t max_system_mem_limit;
 +      uint64_t max_userptr_mem_limit;
        int64_t system_mem_used;
 +      int64_t userptr_mem_used;
        spinlock_t mem_limit_lock;
  } kfd_mem_limit;
  
@@@ -68,7 -57,6 +68,7 @@@ static const char * const domain_bit_to
  
  #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
  
 +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
  
  
  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@@ -90,7 -78,6 +90,7 @@@ static bool check_if_add_bo_to_vm(struc
  
  /* Set memory usage limits. Current, limits are
   *  System (kernel) memory - 3/8th System RAM
 + *  Userptr memory - 3/4th System RAM
   */
  void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
  {
  
        spin_lock_init(&kfd_mem_limit.mem_limit_lock);
        kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
 -      pr_debug("Kernel memory limit %lluM\n",
 -              (kfd_mem_limit.max_system_mem_limit >> 20));
 +      kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
 +      pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
 +              (kfd_mem_limit.max_system_mem_limit >> 20),
 +              (kfd_mem_limit.max_userptr_mem_limit >> 20));
  }
  
  static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
                        goto err_no_mem;
                }
                kfd_mem_limit.system_mem_used += (acc_size + size);
 +      } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
 +              if ((kfd_mem_limit.system_mem_used + acc_size >
 +                      kfd_mem_limit.max_system_mem_limit) ||
 +                      (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
 +                      kfd_mem_limit.max_userptr_mem_limit)) {
 +                      ret = -ENOMEM;
 +                      goto err_no_mem;
 +              }
 +              kfd_mem_limit.system_mem_used += acc_size;
 +              kfd_mem_limit.userptr_mem_used += size;
        }
  err_no_mem:
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@@ -151,16 -126,10 +151,16 @@@ static void unreserve_system_mem_limit(
                                       sizeof(struct amdgpu_bo));
  
        spin_lock(&kfd_mem_limit.mem_limit_lock);
 -      if (domain == AMDGPU_GEM_DOMAIN_GTT)
 +      if (domain == AMDGPU_GEM_DOMAIN_GTT) {
                kfd_mem_limit.system_mem_used -= (acc_size + size);
 +      } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
 +              kfd_mem_limit.system_mem_used -= acc_size;
 +              kfd_mem_limit.userptr_mem_used -= size;
 +      }
        WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
                  "kfd system memory accounting unbalanced");
 +      WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
 +                "kfd userptr memory accounting unbalanced");
  
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
  }
@@@ -169,17 -138,12 +169,17 @@@ void amdgpu_amdkfd_unreserve_system_mem
  {
        spin_lock(&kfd_mem_limit.mem_limit_lock);
  
 -      if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
 +      if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
 +              kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
 +              kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
 +      } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
                kfd_mem_limit.system_mem_used -=
                        (bo->tbo.acc_size + amdgpu_bo_size(bo));
        }
        WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
                  "kfd system memory accounting unbalanced");
 +      WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
 +                "kfd userptr memory accounting unbalanced");
  
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
  }
@@@ -542,8 -506,7 +542,8 @@@ static void remove_bo_from_vm(struct am
  }
  
  static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 -                              struct amdkfd_process_info *process_info)
 +                              struct amdkfd_process_info *process_info,
 +                              bool userptr)
  {
        struct ttm_validate_buffer *entry = &mem->validate_list;
        struct amdgpu_bo *bo = mem->bo;
        entry->shared = true;
        entry->bo = &bo->tbo;
        mutex_lock(&process_info->lock);
 -      list_add_tail(&entry->head, &process_info->kfd_bo_list);
 +      if (userptr)
 +              list_add_tail(&entry->head, &process_info->userptr_valid_list);
 +      else
 +              list_add_tail(&entry->head, &process_info->kfd_bo_list);
        mutex_unlock(&process_info->lock);
  }
  
 +/* Initializes user pages. It registers the MMU notifier and validates
 + * the userptr BO in the GTT domain.
 + *
 + * The BO must already be on the userptr_valid_list. Otherwise an
 + * eviction and restore may happen that leaves the new BO unmapped
 + * with the user mode queues running.
 + *
 + * Takes the process_info->lock to protect against concurrent restore
 + * workers.
 + *
 + * Returns 0 for success, negative errno for errors.
 + */
 +static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 +                         uint64_t user_addr)
 +{
 +      struct amdkfd_process_info *process_info = mem->process_info;
 +      struct amdgpu_bo *bo = mem->bo;
 +      struct ttm_operation_ctx ctx = { true, false };
 +      int ret = 0;
 +
 +      mutex_lock(&process_info->lock);
 +
 +      ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
 +      if (ret) {
 +              pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
 +              goto out;
 +      }
 +
 +      ret = amdgpu_mn_register(bo, user_addr);
 +      if (ret) {
 +              pr_err("%s: Failed to register MMU notifier: %d\n",
 +                     __func__, ret);
 +              goto out;
 +      }
 +
 +      /* If no restore worker is running concurrently, user_pages
 +       * should not be allocated
 +       */
 +      WARN(mem->user_pages, "Leaking user_pages array");
 +
 +      mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
 +                                         sizeof(struct page *),
 +                                         GFP_KERNEL | __GFP_ZERO);
 +      if (!mem->user_pages) {
 +              pr_err("%s: Failed to allocate pages array\n", __func__);
 +              ret = -ENOMEM;
 +              goto unregister_out;
 +      }
 +
 +      ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
 +      if (ret) {
 +              pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
 +              goto free_out;
 +      }
 +
 +      amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
 +
 +      ret = amdgpu_bo_reserve(bo, true);
 +      if (ret) {
 +              pr_err("%s: Failed to reserve BO\n", __func__);
 +              goto release_out;
 +      }
 +      amdgpu_ttm_placement_from_domain(bo, mem->domain);
 +      ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 +      if (ret)
 +              pr_err("%s: failed to validate BO\n", __func__);
 +      amdgpu_bo_unreserve(bo);
 +
 +release_out:
 +      if (ret)
 +              release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
 +free_out:
 +      kvfree(mem->user_pages);
 +      mem->user_pages = NULL;
 +unregister_out:
 +      if (ret)
 +              amdgpu_mn_unregister(bo);
 +out:
 +      mutex_unlock(&process_info->lock);
 +      return ret;
 +}
 +
  /* Reserving a BO and its page table BOs must happen atomically to
   * avoid deadlocks. Some operations update multiple VMs at once. Track
   * all the reservation info in a context structure. Optionally a sync
@@@ -870,8 -748,7 +870,8 @@@ static int update_gpuvm_pte(struct amdg
  }
  
  static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 -              struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
 +              struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
 +              bool no_update_pte)
  {
        int ret;
  
                return ret;
        }
  
 +      if (no_update_pte)
 +              return 0;
 +
        ret = update_gpuvm_pte(adev, entry, sync);
        if (ret) {
                pr_err("update_gpuvm_pte() failed\n");
@@@ -946,8 -820,6 +946,8 @@@ static int init_kfd_vm(struct amdgpu_v
                mutex_init(&info->lock);
                INIT_LIST_HEAD(&info->vm_list_head);
                INIT_LIST_HEAD(&info->kfd_bo_list);
 +              INIT_LIST_HEAD(&info->userptr_valid_list);
 +              INIT_LIST_HEAD(&info->userptr_inval_list);
  
                info->eviction_fence =
                        amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
                        goto create_evict_fence_fail;
                }
  
 +              info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
 +              atomic_set(&info->evicted_bos, 0);
 +              INIT_DELAYED_WORK(&info->restore_userptr_work,
 +                                amdgpu_amdkfd_restore_userptr_worker);
 +
                *process_info = info;
                *ef = dma_fence_get(&info->eviction_fence->base);
        }
@@@ -1005,7 -872,6 +1005,7 @@@ reserve_pd_fail
                dma_fence_put(*ef);
                *ef = NULL;
                *process_info = NULL;
 +              put_pid(info->pid);
  create_evict_fence_fail:
                mutex_destroy(&info->lock);
                kfree(info);
@@@ -1101,12 -967,8 +1101,12 @@@ void amdgpu_amdkfd_gpuvm_destroy_cb(str
        /* Release per-process resources when last compute VM is destroyed */
        if (!process_info->n_vms) {
                WARN_ON(!list_empty(&process_info->kfd_bo_list));
 +              WARN_ON(!list_empty(&process_info->userptr_valid_list));
 +              WARN_ON(!list_empty(&process_info->userptr_inval_list));
  
                dma_fence_put(&process_info->eviction_fence->base);
 +              cancel_delayed_work_sync(&process_info->restore_userptr_work);
 +              put_pid(process_info->pid);
                mutex_destroy(&process_info->lock);
                kfree(process_info);
        }
@@@ -1141,10 -1003,10 +1141,11 @@@ int amdgpu_amdkfd_gpuvm_alloc_memory_of
  {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
        struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 +      uint64_t user_addr = 0;
        struct amdgpu_bo *bo;
+       struct amdgpu_bo_param bp;
        int byte_align;
 -      u32 alloc_domain;
 +      u32 domain, alloc_domain;
        u64 alloc_flags;
        uint32_t mapping_flags;
        int ret;
         * Check on which domain to allocate BO
         */
        if (flags & ALLOC_MEM_FLAGS_VRAM) {
 -              alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
 +              domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
                alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
                alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
                        AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
        } else if (flags & ALLOC_MEM_FLAGS_GTT) {
 -              alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
 +              domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
                alloc_flags = 0;
 +      } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
 +              domain = AMDGPU_GEM_DOMAIN_GTT;
 +              alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
 +              alloc_flags = 0;
 +              if (!offset || !*offset)
 +                      return -EINVAL;
 +              user_addr = *offset;
        } else {
                return -EINVAL;
        }
        pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
                        va, size, domain_string(alloc_domain));
  
-       ret = amdgpu_bo_create(adev, size, byte_align,
-                               alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+       memset(&bp, 0, sizeof(bp));
+       bp.size = size;
+       bp.byte_align = byte_align;
+       bp.domain = alloc_domain;
+       bp.flags = alloc_flags;
+       bp.type = ttm_bo_type_device;
+       bp.resv = NULL;
+       ret = amdgpu_bo_create(adev, &bp, &bo);
        if (ret) {
                pr_debug("Failed to create BO on domain %s. ret %d\n",
                                domain_string(alloc_domain), ret);
        }
        bo->kfd_bo = *mem;
        (*mem)->bo = bo;
 +      if (user_addr)
 +              bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
  
        (*mem)->va = va;
 -      (*mem)->domain = alloc_domain;
 +      (*mem)->domain = domain;
        (*mem)->mapped_to_gpu_memory = 0;
        (*mem)->process_info = avm->process_info;
 -      add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
 +      add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 +
 +      if (user_addr) {
 +              ret = init_user_pages(*mem, current->mm, user_addr);
 +              if (ret) {
 +                      mutex_lock(&avm->process_info->lock);
 +                      list_del(&(*mem)->validate_list.head);
 +                      mutex_unlock(&avm->process_info->lock);
 +                      goto allocate_init_user_pages_failed;
 +              }
 +      }
  
        if (offset)
                *offset = amdgpu_bo_mmap_offset(bo);
  
        return 0;
  
 +allocate_init_user_pages_failed:
 +      amdgpu_bo_unref(&bo);
 +      /* Don't unreserve system mem limit twice */
 +      goto err_reserve_system_mem;
  err_bo_create:
        unreserve_system_mem_limit(adev, size, alloc_domain);
  err_reserve_system_mem:
@@@ -1284,24 -1129,12 +1291,24 @@@ int amdgpu_amdkfd_gpuvm_free_memory_of_
         * be freed anyway
         */
  
 +      /* No more MMU notifiers */
 +      amdgpu_mn_unregister(mem->bo);
 +
        /* Make sure restore workers don't access the BO any more */
        bo_list_entry = &mem->validate_list;
        mutex_lock(&process_info->lock);
        list_del(&bo_list_entry->head);
        mutex_unlock(&process_info->lock);
  
 +      /* Free user pages if necessary */
 +      if (mem->user_pages) {
 +              pr_debug("%s: Freeing user_pages array\n", __func__);
 +              if (mem->user_pages[0])
 +                      release_pages(mem->user_pages,
 +                                      mem->bo->tbo.ttm->num_pages);
 +              kvfree(mem->user_pages);
 +      }
 +
        ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
        if (unlikely(ret))
                return ret;
@@@ -1347,32 -1180,21 +1354,32 @@@ int amdgpu_amdkfd_gpuvm_map_memory_to_g
        struct kfd_bo_va_list *bo_va_entry = NULL;
        struct kfd_bo_va_list *bo_va_entry_aql = NULL;
        unsigned long bo_size;
 -
 -      /* Make sure restore is not running concurrently.
 -       */
 -      mutex_lock(&mem->process_info->lock);
 -
 -      mutex_lock(&mem->lock);
 +      bool is_invalid_userptr = false;
  
        bo = mem->bo;
 -
        if (!bo) {
                pr_err("Invalid BO when mapping memory to GPU\n");
 -              ret = -EINVAL;
 -              goto out;
 +              return -EINVAL;
 +      }
 +
 +      /* Make sure restore is not running concurrently. Since we
 +       * don't map invalid userptr BOs, we rely on the next restore
 +       * worker to do the mapping
 +       */
 +      mutex_lock(&mem->process_info->lock);
 +
 +      /* Lock mmap-sem. If we find an invalid userptr BO, we can be
 +       * sure that the MMU notifier is no longer running
 +       * concurrently and the queues are actually stopped
 +       */
 +      if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
 +              down_write(&current->mm->mmap_sem);
 +              is_invalid_userptr = atomic_read(&mem->invalid);
 +              up_write(&current->mm->mmap_sem);
        }
  
 +      mutex_lock(&mem->lock);
 +
        domain = mem->domain;
        bo_size = bo->tbo.mem.size;
  
        if (unlikely(ret))
                goto out;
  
 +      /* Userptr can be marked as "not invalid", but not actually be
 +       * validated yet (still in the system domain). In that case
 +       * the queues are still stopped and we can leave mapping for
 +       * the next restore worker
 +       */
 +      if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
 +              is_invalid_userptr = true;
 +
        if (check_if_add_bo_to_vm(avm, mem)) {
                ret = add_bo_to_vm(adev, mem, avm, false,
                                &bo_va_entry);
                        goto add_bo_to_vm_failed;
        }
  
 -      if (mem->mapped_to_gpu_memory == 0) {
 +      if (mem->mapped_to_gpu_memory == 0 &&
 +          !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
                /* Validate BO only once. The eviction fence gets added to BO
                 * the first time it is mapped. Validate will wait for all
                 * background evictions to complete.
                                        entry->va, entry->va + bo_size,
                                        entry);
  
 -                      ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
 +                      ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
 +                                            is_invalid_userptr);
                        if (ret) {
                                pr_err("Failed to map radeon bo to gpuvm\n");
                                goto map_bo_to_gpuvm_failed;
@@@ -1613,337 -1425,6 +1620,337 @@@ bo_reserve_failed
        return ret;
  }
  
 +/* Evict a userptr BO by stopping the queues if necessary
 + *
 + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
 + * cannot do any memory allocations, and cannot take any locks that
 + * are held elsewhere while allocating memory. Therefore this is as
 + * simple as possible, using atomic counters.
 + *
 + * It doesn't do anything to the BO itself. The real work happens in
 + * restore, where we get updated page addresses. This function only
 + * ensures that GPU access to the BO is stopped.
 + */
 +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
 +                              struct mm_struct *mm)
 +{
 +      struct amdkfd_process_info *process_info = mem->process_info;
 +      int invalid, evicted_bos;
 +      int r = 0;
 +
 +      invalid = atomic_inc_return(&mem->invalid);
 +      evicted_bos = atomic_inc_return(&process_info->evicted_bos);
 +      if (evicted_bos == 1) {
 +              /* First eviction, stop the queues */
 +              r = kgd2kfd->quiesce_mm(mm);
 +              if (r)
 +                      pr_err("Failed to quiesce KFD\n");
 +              schedule_delayed_work(&process_info->restore_userptr_work,
 +                      msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 +      }
 +
 +      return r;
 +}
 +
 +/* Update invalid userptr BOs
 + *
 + * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
 + * userptr_inval_list and updates user pages for all BOs that have
 + * been invalidated since their last update.
 + */
 +static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 +                                   struct mm_struct *mm)
 +{
 +      struct kgd_mem *mem, *tmp_mem;
 +      struct amdgpu_bo *bo;
 +      struct ttm_operation_ctx ctx = { false, false };
 +      int invalid, ret;
 +
 +      /* Move all invalidated BOs to the userptr_inval_list and
 +       * release their user pages by migration to the CPU domain
 +       */
 +      list_for_each_entry_safe(mem, tmp_mem,
 +                               &process_info->userptr_valid_list,
 +                               validate_list.head) {
 +              if (!atomic_read(&mem->invalid))
 +                      continue; /* BO is still valid */
 +
 +              bo = mem->bo;
 +
 +              if (amdgpu_bo_reserve(bo, true))
 +                      return -EAGAIN;
 +              amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
 +              ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 +              amdgpu_bo_unreserve(bo);
 +              if (ret) {
 +                      pr_err("%s: Failed to invalidate userptr BO\n",
 +                             __func__);
 +                      return -EAGAIN;
 +              }
 +
 +              list_move_tail(&mem->validate_list.head,
 +                             &process_info->userptr_inval_list);
 +      }
 +
 +      if (list_empty(&process_info->userptr_inval_list))
 +              return 0; /* All evicted userptr BOs were freed */
 +
 +      /* Go through userptr_inval_list and update any invalid user_pages */
 +      list_for_each_entry(mem, &process_info->userptr_inval_list,
 +                          validate_list.head) {
 +              invalid = atomic_read(&mem->invalid);
 +              if (!invalid)
 +                      /* BO hasn't been invalidated since the last
 +                       * revalidation attempt. Keep its BO list.
 +                       */
 +                      continue;
 +
 +              bo = mem->bo;
 +
 +              if (!mem->user_pages) {
 +                      mem->user_pages =
 +                              kvmalloc_array(bo->tbo.ttm->num_pages,
 +                                               sizeof(struct page *),
 +                                               GFP_KERNEL | __GFP_ZERO);
 +                      if (!mem->user_pages) {
 +                              pr_err("%s: Failed to allocate pages array\n",
 +                                     __func__);
 +                              return -ENOMEM;
 +                      }
 +              } else if (mem->user_pages[0]) {
 +                      release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
 +              }
 +
 +              /* Get updated user pages */
 +              ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
 +                                                 mem->user_pages);
 +              if (ret) {
 +                      mem->user_pages[0] = NULL;
 +                      pr_info("%s: Failed to get user pages: %d\n",
 +                              __func__, ret);
 +                      /* Pretend it succeeded. It will fail later
 +                       * with a VM fault if the GPU tries to access
 +                       * it. Better than hanging indefinitely with
 +                       * stalled user mode queues.
 +                       */
 +              }
 +
 +              /* Mark the BO as valid unless it was invalidated
 +               * again concurrently
 +               */
 +              if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
 +                      return -EAGAIN;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Validate invalid userptr BOs
 + *
 + * Validates BOs on the userptr_inval_list, and moves them back to the
 + * userptr_valid_list. Also updates GPUVM page tables with new page
 + * addresses and waits for the page table updates to complete.
 + */
 +static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 +{
 +      struct amdgpu_bo_list_entry *pd_bo_list_entries;
 +      struct list_head resv_list, duplicates;
 +      struct ww_acquire_ctx ticket;
 +      struct amdgpu_sync sync;
 +
 +      struct amdgpu_vm *peer_vm;
 +      struct kgd_mem *mem, *tmp_mem;
 +      struct amdgpu_bo *bo;
 +      struct ttm_operation_ctx ctx = { false, false };
 +      int i, ret;
 +
 +      pd_bo_list_entries = kcalloc(process_info->n_vms,
 +                                   sizeof(struct amdgpu_bo_list_entry),
 +                                   GFP_KERNEL);
 +      if (!pd_bo_list_entries) {
 +              pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
 +              return -ENOMEM;
 +      }
 +
 +      INIT_LIST_HEAD(&resv_list);
 +      INIT_LIST_HEAD(&duplicates);
 +
 +      /* Get all the page directory BOs that need to be reserved */
 +      i = 0;
 +      list_for_each_entry(peer_vm, &process_info->vm_list_head,
 +                          vm_list_node)
 +              amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
 +                                  &pd_bo_list_entries[i++]);
 +      /* Add the userptr_inval_list entries to resv_list */
 +      list_for_each_entry(mem, &process_info->userptr_inval_list,
 +                          validate_list.head) {
 +              list_add_tail(&mem->resv_list.head, &resv_list);
 +              mem->resv_list.bo = mem->validate_list.bo;
 +              mem->resv_list.shared = mem->validate_list.shared;
 +      }
 +
 +      /* Reserve all BOs and page tables for validation */
 +      ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
 +      WARN(!list_empty(&duplicates), "Duplicates should be empty");
 +      if (ret)
 +              goto out;
 +
 +      amdgpu_sync_create(&sync);
 +
 +      /* Avoid triggering eviction fences when unmapping invalid
 +       * userptr BOs (waits for all fences, doesn't use
 +       * FENCE_OWNER_VM)
 +       */
 +      list_for_each_entry(peer_vm, &process_info->vm_list_head,
 +                          vm_list_node)
 +              amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
 +                                              process_info->eviction_fence,
 +                                              NULL, NULL);
 +
 +      ret = process_validate_vms(process_info);
 +      if (ret)
 +              goto unreserve_out;
 +
 +      /* Validate BOs and update GPUVM page tables */
 +      list_for_each_entry_safe(mem, tmp_mem,
 +                               &process_info->userptr_inval_list,
 +                               validate_list.head) {
 +              struct kfd_bo_va_list *bo_va_entry;
 +
 +              bo = mem->bo;
 +
 +              /* Copy pages array and validate the BO if we got user pages */
 +              if (mem->user_pages[0]) {
 +                      amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 +                                                   mem->user_pages);
 +                      amdgpu_ttm_placement_from_domain(bo, mem->domain);
 +                      ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 +                      if (ret) {
 +                              pr_err("%s: failed to validate BO\n", __func__);
 +                              goto unreserve_out;
 +                      }
 +              }
 +
 +              /* Validate succeeded, now the BO owns the pages, free
 +               * our copy of the pointer array. Put this BO back on
 +               * the userptr_valid_list. If we need to revalidate
 +               * it, we need to start from scratch.
 +               */
 +              kvfree(mem->user_pages);
 +              mem->user_pages = NULL;
 +              list_move_tail(&mem->validate_list.head,
 +                             &process_info->userptr_valid_list);
 +
 +              /* Update mapping. If the BO was not validated
 +               * (because we couldn't get user pages), this will
 +               * clear the page table entries, which will result in
 +               * VM faults if the GPU tries to access the invalid
 +               * memory.
 +               */
 +              list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
 +                      if (!bo_va_entry->is_mapped)
 +                              continue;
 +
 +                      ret = update_gpuvm_pte((struct amdgpu_device *)
 +                                             bo_va_entry->kgd_dev,
 +                                             bo_va_entry, &sync);
 +                      if (ret) {
 +                              pr_err("%s: update PTE failed\n", __func__);
 +                              /* make sure this gets validated again */
 +                              atomic_inc(&mem->invalid);
 +                              goto unreserve_out;
 +                      }
 +              }
 +      }
 +
 +      /* Update page directories */
 +      ret = process_update_pds(process_info, &sync);
 +
 +unreserve_out:
 +      list_for_each_entry(peer_vm, &process_info->vm_list_head,
 +                          vm_list_node)
 +              amdgpu_bo_fence(peer_vm->root.base.bo,
 +                              &process_info->eviction_fence->base, true);
 +      ttm_eu_backoff_reservation(&ticket, &resv_list);
 +      amdgpu_sync_wait(&sync, false);
 +      amdgpu_sync_free(&sync);
 +out:
 +      kfree(pd_bo_list_entries);
 +
 +      return ret;
 +}
 +
 +/* Worker callback to restore evicted userptr BOs
 + *
 + * Tries to update and validate all userptr BOs. If successful and no
 + * concurrent evictions happened, the queues are restarted. Otherwise,
 + * reschedule for another attempt later.
 + */
 +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 +{
 +      struct delayed_work *dwork = to_delayed_work(work);
 +      struct amdkfd_process_info *process_info =
 +              container_of(dwork, struct amdkfd_process_info,
 +                           restore_userptr_work);
 +      struct task_struct *usertask;
 +      struct mm_struct *mm;
 +      int evicted_bos;
 +
 +      evicted_bos = atomic_read(&process_info->evicted_bos);
 +      if (!evicted_bos)
 +              return;
 +
 +      /* Reference task and mm in case of concurrent process termination */
 +      usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
 +      if (!usertask)
 +              return;
 +      mm = get_task_mm(usertask);
 +      if (!mm) {
 +              put_task_struct(usertask);
 +              return;
 +      }
 +
 +      mutex_lock(&process_info->lock);
 +
 +      if (update_invalid_user_pages(process_info, mm))
 +              goto unlock_out;
 +      /* userptr_inval_list can be empty if all evicted userptr BOs
 +       * have been freed. In that case there is nothing to validate
 +       * and we can just restart the queues.
 +       */
 +      if (!list_empty(&process_info->userptr_inval_list)) {
 +              if (atomic_read(&process_info->evicted_bos) != evicted_bos)
 +                      goto unlock_out; /* Concurrent eviction, try again */
 +
 +              if (validate_invalid_user_pages(process_info))
 +                      goto unlock_out;
 +      }
 +      /* Final check for concurrent evicton and atomic update. If
 +       * another eviction happens after successful update, it will
 +       * be a first eviction that calls quiesce_mm. The eviction
 +       * reference counting inside KFD will handle this case.
 +       */
 +      if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
 +          evicted_bos)
 +              goto unlock_out;
 +      evicted_bos = 0;
 +      if (kgd2kfd->resume_mm(mm)) {
 +              pr_err("%s: Failed to resume KFD\n", __func__);
 +              /* No recovery from this failure. Probably the CP is
 +               * hanging. No point trying again.
 +               */
 +      }
 +unlock_out:
 +      mutex_unlock(&process_info->lock);
 +      mmput(mm);
 +      put_task_struct(usertask);
 +
 +      /* If validation failed, reschedule another attempt */
 +      if (evicted_bos)
 +              schedule_delayed_work(&process_info->restore_userptr_work,
 +                      msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 +}
 +
  /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
   *   KFD process identified by process_info
   *
index 8e66f3702b7cf748c0a8e336255fdcde945ddfd3,e1756b68a17beda71956555e07b5b8595f786e27..9c1d491d742e095c24b9cff843cb9830c3d67753
@@@ -382,8 -382,7 +382,7 @@@ retry
  
        p->bytes_moved += ctx.bytes_moved;
        if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-           bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-           bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+           amdgpu_bo_in_cpu_visible_vram(bo))
                p->bytes_moved_vis += ctx.bytes_moved;
  
        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@@ -411,7 -410,6 +410,6 @@@ static bool amdgpu_cs_try_evict(struct 
                struct amdgpu_bo_list_entry *candidate = p->evictable;
                struct amdgpu_bo *bo = candidate->robj;
                struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-               u64 initial_bytes_moved, bytes_moved;
                bool update_bytes_moved_vis;
                uint32_t other;
  
                        continue;
  
                /* Good we can try to move this BO somewhere else */
-               amdgpu_ttm_placement_from_domain(bo, other);
                update_bytes_moved_vis =
                        adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-                       bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-                       bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
-               initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+                       amdgpu_bo_in_cpu_visible_vram(bo);
+               amdgpu_ttm_placement_from_domain(bo, other);
                r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-               bytes_moved = atomic64_read(&adev->num_bytes_moved) -
-                       initial_bytes_moved;
-               p->bytes_moved += bytes_moved;
+               p->bytes_moved += ctx.bytes_moved;
                if (update_bytes_moved_vis)
-                       p->bytes_moved_vis += bytes_moved;
+                       p->bytes_moved_vis += ctx.bytes_moved;
  
                if (unlikely(r))
                        break;
@@@ -536,7 -530,7 +530,7 @@@ static int amdgpu_cs_parser_bos(struct 
        if (p->bo_list) {
                amdgpu_bo_list_get_list(p->bo_list, &p->validated);
                if (p->bo_list->first_userptr != p->bo_list->num_entries)
 -                      p->mn = amdgpu_mn_get(p->adev);
 +                      p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
        }
  
        INIT_LIST_HEAD(&duplicates);
index c713d30cba86854accffcacddefb2218ec319aaf,5e9fd256faada5ad98f8ca4c0e8b083a52322df1..69a2b25b3696e9505a468d8c57509ba99351caa1
@@@ -111,7 -111,7 +111,7 @@@ static int amdgpu_ttm_global_init(struc
        ring = adev->mman.buffer_funcs_ring;
        rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
        r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
-                                 rq, amdgpu_sched_jobs, NULL);
+                                 rq, NULL);
        if (r) {
                DRM_ERROR("Failed setting up TTM BO move run queue.\n");
                goto error_entity;
@@@ -223,20 -223,8 +223,8 @@@ static void amdgpu_evict_flags(struct t
                if (!adev->mman.buffer_funcs_enabled) {
                        amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
                } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-                          !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-                       unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
-                       struct drm_mm_node *node = bo->mem.mm_node;
-                       unsigned long pages_left;
-                       for (pages_left = bo->mem.num_pages;
-                            pages_left;
-                            pages_left -= node->size, node++) {
-                               if (node->start < fpfn)
-                                       break;
-                       }
-                       if (!pages_left)
-                               goto gtt;
+                          !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+                          amdgpu_bo_in_cpu_visible_vram(abo)) {
  
                        /* Try evicting to the CPU inaccessible part of VRAM
                         * first, but only set GTT as busy placement, so this
                         */
                        amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
                                                         AMDGPU_GEM_DOMAIN_GTT);
-                       abo->placements[0].fpfn = fpfn;
+                       abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
                        abo->placements[0].lpfn = 0;
                        abo->placement.busy_placement = &abo->placements[1];
                        abo->placement.num_busy_placement = 1;
                } else {
- gtt:
                        amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
                }
                break;
@@@ -695,7 -682,7 +682,7 @@@ struct amdgpu_ttm_tt 
        struct ttm_dma_tt       ttm;
        u64                     offset;
        uint64_t                userptr;
 -      struct mm_struct        *usermm;
 +      struct task_struct      *usertask;
        uint32_t                userflags;
        spinlock_t              guptasklock;
        struct list_head        guptasks;
  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
  {
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 +      struct mm_struct *mm = gtt->usertask->mm;
        unsigned int flags = 0;
        unsigned pinned = 0;
        int r;
  
 +      if (!mm) /* Happens during process shutdown */
 +              return -ESRCH;
 +
        if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
                flags |= FOLL_WRITE;
  
 -      down_read(&current->mm->mmap_sem);
 +      down_read(&mm->mmap_sem);
  
        if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
                /* check that we only use anonymous memory
                unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
                struct vm_area_struct *vma;
  
 -              vma = find_vma(gtt->usermm, gtt->userptr);
 +              vma = find_vma(mm, gtt->userptr);
                if (!vma || vma->vm_file || vma->vm_end < end) {
 -                      up_read(&current->mm->mmap_sem);
 +                      up_read(&mm->mmap_sem);
                        return -EPERM;
                }
        }
                list_add(&guptask.list, &gtt->guptasks);
                spin_unlock(&gtt->guptasklock);
  
 -              r = get_user_pages(userptr, num_pages, flags, p, NULL);
 +              if (mm == current->mm)
 +                      r = get_user_pages(userptr, num_pages, flags, p, NULL);
 +              else
 +                      r = get_user_pages_remote(gtt->usertask,
 +                                      mm, userptr, num_pages,
 +                                      flags, p, NULL, NULL);
  
                spin_lock(&gtt->guptasklock);
                list_del(&guptask.list);
  
        } while (pinned < ttm->num_pages);
  
 -      up_read(&current->mm->mmap_sem);
 +      up_read(&mm->mmap_sem);
        return 0;
  
  release_pages:
        release_pages(pages, pinned);
 -      up_read(&current->mm->mmap_sem);
 +      up_read(&mm->mmap_sem);
        return r;
  }
  
@@@ -856,6 -834,45 +843,45 @@@ static void amdgpu_ttm_tt_unpin_userptr
        sg_free_table(ttm->sg);
  }
  
+ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+                               struct ttm_buffer_object *tbo,
+                               uint64_t flags)
+ {
+       struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+       struct ttm_tt *ttm = tbo->ttm;
+       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       int r;
+       if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+               uint64_t page_idx = 1;
+               r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
+                               ttm->pages, gtt->ttm.dma_address, flags);
+               if (r)
+                       goto gart_bind_fail;
+               /* Patch mtype of the second part BO */
+               flags &=  ~AMDGPU_PTE_MTYPE_MASK;
+               flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+               r = amdgpu_gart_bind(adev,
+                               gtt->offset + (page_idx << PAGE_SHIFT),
+                               ttm->num_pages - page_idx,
+                               &ttm->pages[page_idx],
+                               &(gtt->ttm.dma_address[page_idx]), flags);
+       } else {
+               r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+                                    ttm->pages, gtt->ttm.dma_address, flags);
+       }
+ gart_bind_fail:
+       if (r)
+               DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+                         ttm->num_pages, gtt->offset);
+       return r;
+ }
  static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
                                   struct ttm_mem_reg *bo_mem)
  {
@@@ -929,8 -946,7 +955,7 @@@ int amdgpu_ttm_alloc_gart(struct ttm_bu
  
        flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
        gtt->offset = (u64)tmp.start << PAGE_SHIFT;
-       r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
-                            bo->ttm->pages, gtt->ttm.dma_address, flags);
+       r = amdgpu_ttm_gart_bind(adev, bo, flags);
        if (unlikely(r)) {
                ttm_bo_mem_put(bo, &tmp);
                return r;
  int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
  {
        struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
-       struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
        uint64_t flags;
        int r;
  
-       if (!gtt)
+       if (!tbo->ttm)
                return 0;
  
-       flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
-       r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
-                            gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
-       if (r)
-               DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
-                         gtt->ttm.ttm.num_pages, gtt->offset);
+       flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
+       r = amdgpu_ttm_gart_bind(adev, tbo, flags);
        return r;
  }
  
@@@ -987,9 -999,6 +1008,9 @@@ static void amdgpu_ttm_backend_destroy(
  {
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
  
 +      if (gtt->usertask)
 +              put_task_struct(gtt->usertask);
 +
        ttm_dma_tt_fini(&gtt->ttm);
        kfree(gtt);
  }
@@@ -1091,13 -1100,8 +1112,13 @@@ int amdgpu_ttm_tt_set_userptr(struct tt
                return -EINVAL;
  
        gtt->userptr = addr;
 -      gtt->usermm = current->mm;
        gtt->userflags = flags;
 +
 +      if (gtt->usertask)
 +              put_task_struct(gtt->usertask);
 +      gtt->usertask = current->group_leader;
 +      get_task_struct(gtt->usertask);
 +
        spin_lock_init(&gtt->guptasklock);
        INIT_LIST_HEAD(&gtt->guptasks);
        atomic_set(&gtt->mmu_invalidations, 0);
@@@ -1113,10 -1117,7 +1134,10 @@@ struct mm_struct *amdgpu_ttm_tt_get_use
        if (gtt == NULL)
                return NULL;
  
 -      return gtt->usermm;
 +      if (gtt->usertask == NULL)
 +              return NULL;
 +
 +      return gtt->usertask->mm;
  }
  
  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
@@@ -1349,6 -1350,7 +1370,7 @@@ static void amdgpu_ttm_fw_reserve_vram_
  static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
  {
        struct ttm_operation_ctx ctx = { false, false };
+       struct amdgpu_bo_param bp;
        int r = 0;
        int i;
        u64 vram_size = adev->gmc.visible_vram_size;
        u64 size = adev->fw_vram_usage.size;
        struct amdgpu_bo *bo;
  
+       memset(&bp, 0, sizeof(bp));
+       bp.size = adev->fw_vram_usage.size;
+       bp.byte_align = PAGE_SIZE;
+       bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+       bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+               AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+       bp.type = ttm_bo_type_kernel;
+       bp.resv = NULL;
        adev->fw_vram_usage.va = NULL;
        adev->fw_vram_usage.reserved_bo = NULL;
  
        if (adev->fw_vram_usage.size > 0 &&
                adev->fw_vram_usage.size <= vram_size) {
  
-               r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
-                                    AMDGPU_GEM_DOMAIN_VRAM,
-                                    AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-                                    AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-                                    ttm_bo_type_kernel, NULL,
+               r = amdgpu_bo_create(adev, &bp,
                                     &adev->fw_vram_usage.reserved_bo);
                if (r)
                        goto error_create;
@@@ -1474,12 -1480,14 +1500,14 @@@ int amdgpu_ttm_init(struct amdgpu_devic
                return r;
        }
  
-       r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
-                                   AMDGPU_GEM_DOMAIN_VRAM,
-                                   &adev->stolen_vga_memory,
-                                   NULL, NULL);
-       if (r)
-               return r;
+       if (adev->gmc.stolen_size) {
+               r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+                                           AMDGPU_GEM_DOMAIN_VRAM,
+                                           &adev->stolen_vga_memory,
+                                           NULL, NULL);
+               if (r)
+                       return r;
+       }
        DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
                 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
  
        return 0;
  }
  
+ void amdgpu_ttm_late_init(struct amdgpu_device *adev)
+ {
+       amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+ }
  void amdgpu_ttm_fini(struct amdgpu_device *adev)
  {
        if (!adev->mman.initialized)
                return;
  
        amdgpu_ttm_debugfs_fini(adev);
-       amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
        amdgpu_ttm_fw_reserve_vram_fini(adev);
        if (adev->mman.aper_base_kaddr)
                iounmap(adev->mman.aper_base_kaddr);
index 5916cc25e28be5e8b2497e5b663da49f6c4b03d9,ee71c40b392061631fb1976f80f1fafdf1b7add1..75592bd04d6ad65fb1c08cd6e020b0492f7d2af7
@@@ -161,8 -161,38 +161,38 @@@ void amdgpu_ucode_print_rlc_hdr(const s
                          le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
                DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
                          le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
-               DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
-                         le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+               DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+                         le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+               if (version_minor == 1) {
+                       const struct rlc_firmware_header_v2_1 *v2_1 =
+                               container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+                       DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+                                 le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+                       DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+                       DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+                       DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+                       DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+                       DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+                       DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+                       DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+                       DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+                       DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+                       DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+                       DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+                       DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+                                 le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+               }
        } else {
                DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
        }
@@@ -265,6 -295,7 +295,7 @@@ amdgpu_ucode_get_load_type(struct amdgp
        case CHIP_POLARIS10:
        case CHIP_POLARIS11:
        case CHIP_POLARIS12:
+       case CHIP_VEGAM:
                if (!load_type)
                        return AMDGPU_FW_LOAD_DIRECT;
                else
                else
                        return AMDGPU_FW_LOAD_PSP;
        default:
 -              DRM_ERROR("Unknow firmware load type\n");
 +              DRM_ERROR("Unknown firmware load type\n");
        }
  
        return AMDGPU_FW_LOAD_DIRECT;
@@@ -307,7 -338,10 +338,10 @@@ static int amdgpu_ucode_init_single_fw(
            (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
             ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
             ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
-            ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
+            ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
+            ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
+            ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
+            ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
                ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
  
                memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
                                              le32_to_cpu(header->ucode_array_offset_bytes) +
                                              le32_to_cpu(cp_hdr->jt_offset) * 4),
                       ucode->ucode_size);
+       } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
+               ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+               memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
+                      ucode->ucode_size);
+       } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
+               ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+               memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
+                      ucode->ucode_size);
+       } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+               ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+               memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
+                      ucode->ucode_size);
        }
  
        return 0;
index e5962e61beb57051bc4b1cc4efa76af1e6777ff6,2c5e2a41632e97779f543b75382e4ea516282084..fc1911834ab582ce5972b4814399fbd72a5c95c0
@@@ -41,7 -41,6 +41,6 @@@
  #define GFX9_MEC_HPD_SIZE 2048
  #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
- #define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
  
  #define mmPWR_MISC_CNTL_STATUS                                        0x0183
  #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                               0
@@@ -185,6 -184,30 +184,30 @@@ static const struct soc15_reg_golden go
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
  };
  
+ static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
+ {
+       mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+       mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ };
+ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
+ {
+       mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+       mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ };
  #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
  #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
  #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@@ -401,6 -424,27 +424,27 @@@ static void gfx_v9_0_free_microcode(str
        kfree(adev->gfx.rlc.register_list_format);
  }
  
+ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+ {
+       const struct rlc_firmware_header_v2_1 *rlc_hdr;
+       rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+       adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+       adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+       adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+       adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+       adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+       adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+       adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+       adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+       adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+       adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+       adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+       adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+       adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+                       le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+ }
  static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
  {
        const char *chip_name;
        const struct rlc_firmware_header_v2_0 *rlc_hdr;
        unsigned int *tmp = NULL;
        unsigned int i = 0;
+       uint16_t version_major;
+       uint16_t version_minor;
  
        DRM_DEBUG("\n");
  
                goto out;
        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+       version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+       version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+       if (version_major == 2 && version_minor == 1)
+               adev->gfx.rlc.is_rlc_v2_1 = true;
        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
        adev->gfx.rlc.save_and_restore_offset =
        for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
  
+       if (adev->gfx.rlc.is_rlc_v2_1)
+               gfx_v9_0_init_rlc_ext_microcode(adev);
        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
        if (err)
                adev->firmware.fw_size +=
                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
  
+               if (adev->gfx.rlc.is_rlc_v2_1) {
+                       info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+                       info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+                       info->fw = adev->gfx.rlc_fw;
+                       adev->firmware.fw_size +=
+                               ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+                       info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+                       info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+                       info->fw = adev->gfx.rlc_fw;
+                       adev->firmware.fw_size +=
+                               ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+                       info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+                       info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+                       info->fw = adev->gfx.rlc_fw;
+                       adev->firmware.fw_size +=
+                               ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+               }
                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
                info->fw = adev->gfx.mec_fw;
@@@ -1600,6 -1675,7 +1675,7 @@@ static void gfx_v9_0_gpu_init(struct am
  
        gfx_v9_0_setup_rb(adev);
        gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+       adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
  
        /* XXX SH_MEM regs */
        /* where to put LDS, scratch, GPUVM in FSA64 space */
                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
                        WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
-                       tmp = adev->gmc.shared_aperture_start >> 48;
+                       tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+                               (adev->gmc.private_aperture_start >> 48));
+                       tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+                               (adev->gmc.shared_aperture_start >> 48));
                        WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
                }
        }
@@@ -1708,55 -1787,42 +1787,42 @@@ static void gfx_v9_0_init_csb(struct am
                        adev->gfx.rlc.clear_state_size);
  }
  
- static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
+ static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
                                int indirect_offset,
                                int list_size,
                                int *unique_indirect_regs,
                                int *unique_indirect_reg_count,
-                               int max_indirect_reg_count,
                                int *indirect_start_offsets,
-                               int *indirect_start_offsets_count,
-                               int max_indirect_start_offsets_count)
+                               int *indirect_start_offsets_count)
  {
        int idx;
-       bool new_entry = true;
  
        for (; indirect_offset < list_size; indirect_offset++) {
+               indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+               *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
  
-               if (new_entry) {
-                       new_entry = false;
-                       indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
-                       *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
-                       BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
-               }
+               while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
+                       indirect_offset += 2;
  
-               if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
-                       new_entry = true;
-                       continue;
-               }
+                       /* look for the matching indice */
+                       for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
+                               if (unique_indirect_regs[idx] ==
+                                       register_list_format[indirect_offset] ||
+                                       !unique_indirect_regs[idx])
+                                       break;
+                       }
  
-               indirect_offset += 2;
+                       BUG_ON(idx >= *unique_indirect_reg_count);
  
-               /* look for the matching indice */
-               for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
-                       if (unique_indirect_regs[idx] ==
-                               register_list_format[indirect_offset])
-                               break;
-               }
+                       if (!unique_indirect_regs[idx])
+                               unique_indirect_regs[idx] = register_list_format[indirect_offset];
  
-               if (idx >= *unique_indirect_reg_count) {
-                       unique_indirect_regs[*unique_indirect_reg_count] =
-                               register_list_format[indirect_offset];
-                       idx = *unique_indirect_reg_count;
-                       *unique_indirect_reg_count = *unique_indirect_reg_count + 1;
-                       BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
+                       indirect_offset++;
                }
-               register_list_format[indirect_offset] = idx;
        }
  }
  
- static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
+ static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
  {
        int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
        int unique_indirect_reg_count = 0;
        int indirect_start_offsets_count = 0;
  
        int list_size = 0;
-       int i = 0;
+       int i = 0, j = 0;
        u32 tmp = 0;
  
        u32 *register_list_format =
                adev->gfx.rlc.reg_list_format_size_bytes);
  
        /* setup unique_indirect_regs array and indirect_start_offsets array */
-       gfx_v9_0_parse_ind_reg_list(register_list_format,
-                               GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
-                               adev->gfx.rlc.reg_list_format_size_bytes >> 2,
-                               unique_indirect_regs,
-                               &unique_indirect_reg_count,
-                               ARRAY_SIZE(unique_indirect_regs),
-                               indirect_start_offsets,
-                               &indirect_start_offsets_count,
-                               ARRAY_SIZE(indirect_start_offsets));
+       unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
+       gfx_v9_1_parse_ind_reg_list(register_list_format,
+                                   adev->gfx.rlc.reg_list_format_direct_reg_list_length,
+                                   adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+                                   unique_indirect_regs,
+                                   &unique_indirect_reg_count,
+                                   indirect_start_offsets,
+                                   &indirect_start_offsets_count);
  
        /* enable auto inc in case it is disabled */
        tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
                        adev->gfx.rlc.register_restore[i]);
  
-       /* load direct register */
-       WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
-       for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
-               WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
-                       adev->gfx.rlc.register_restore[i]);
        /* load indirect register */
        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
                adev->gfx.rlc.reg_list_format_start);
-       for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+       /* direct register portion */
+       for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
                        register_list_format[i]);
  
+       /* indirect register portion */
+       while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
+               if (register_list_format[i] == 0xFFFFFFFF) {
+                       WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+                       continue;
+               }
+               WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+               WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+               for (j = 0; j < unique_indirect_reg_count; j++) {
+                       if (register_list_format[i] == unique_indirect_regs[j]) {
+                               WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
+                               break;
+                       }
+               }
+               BUG_ON(j >= unique_indirect_reg_count);
+               i++;
+       }
        /* set save/restore list size */
        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
        list_size = list_size >> 1;
                adev->gfx.rlc.starting_offsets_start);
        for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
-                       indirect_start_offsets[i]);
+                      indirect_start_offsets[i]);
  
        /* load unique indirect regs*/
        for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
-               WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
-                       unique_indirect_regs[i] & 0x3FFFF);
-               WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
-                       unique_indirect_regs[i] >> 20);
+               if (unique_indirect_regs[i] != 0) {
+                       WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+                              + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
+                              unique_indirect_regs[i] & 0x3FFFF);
+                       WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+                              + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
+                              unique_indirect_regs[i] >> 20);
+               }
        }
  
        kfree(register_list_format);
@@@ -2010,6 -2098,9 +2098,9 @@@ static void gfx_v9_0_enable_gfx_dynamic
  
  static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
  {
+       if (!adev->gfx.rlc.is_rlc_v2_1)
+               return;
        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
                              AMD_PG_SUPPORT_GFX_SMG |
                              AMD_PG_SUPPORT_GFX_DMG |
                              AMD_PG_SUPPORT_GDS |
                              AMD_PG_SUPPORT_RLC_SMU_HS)) {
                gfx_v9_0_init_csb(adev);
-               gfx_v9_0_init_rlc_save_restore_list(adev);
+               gfx_v9_1_init_rlc_save_restore_list(adev);
                gfx_v9_0_enable_save_restore_machine(adev);
  
-               if (adev->asic_type == CHIP_RAVEN) {
-                       WREG32(mmRLC_JUMP_TABLE_RESTORE,
-                               adev->gfx.rlc.cp_table_gpu_addr >> 8);
-                       gfx_v9_0_init_gfx_power_gating(adev);
-                       if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
-                               gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
-                               gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
-                       } else {
-                               gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
-                               gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
-                       }
-                       if (adev->pg_flags & AMD_PG_SUPPORT_CP)
-                               gfx_v9_0_enable_cp_power_gating(adev, true);
-                       else
-                               gfx_v9_0_enable_cp_power_gating(adev, false);
-               }
+               WREG32(mmRLC_JUMP_TABLE_RESTORE,
+                      adev->gfx.rlc.cp_table_gpu_addr >> 8);
+               gfx_v9_0_init_gfx_power_gating(adev);
        }
  }
  
@@@ -3061,6 -3137,9 +3137,9 @@@ static int gfx_v9_0_hw_fini(void *handl
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int i;
  
+       amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+                                              AMD_PG_STATE_UNGATE);
        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
  
@@@ -3279,6 -3358,11 +3358,11 @@@ static int gfx_v9_0_late_init(void *han
        if (r)
                return r;
  
+       r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+                                                  AMD_PG_STATE_GATE);
+       if (r)
+               return r;
        return 0;
  }
  
@@@ -3339,8 -3423,7 +3423,7 @@@ static void gfx_v9_0_exit_rlc_safe_mode
  static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
                                                bool enable)
  {
-       /* TODO: double check if we need to perform under safe mdoe */
-       /* gfx_v9_0_enter_rlc_safe_mode(adev); */
+       gfx_v9_0_enter_rlc_safe_mode(adev);
  
        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
                gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
                gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
        }
  
-       /* gfx_v9_0_exit_rlc_safe_mode(adev); */
+       gfx_v9_0_exit_rlc_safe_mode(adev);
  }
  
  static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@@ -3742,7 -3825,7 +3825,7 @@@ static void gfx_v9_0_ring_emit_ib_gfx(s
        }
  
        amdgpu_ring_write(ring, header);
- BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
      BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
        amdgpu_ring_write(ring,
  #ifdef __BIG_ENDIAN
                (2 << 0) |
@@@ -3774,13 -3857,16 +3857,16 @@@ static void gfx_v9_0_ring_emit_fence(st
  {
        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+       bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
  
        /* RELEASE_MEM - flush caches, send int */
        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-       amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
-                                EOP_TC_ACTION_EN |
-                                EOP_TC_WB_ACTION_EN |
-                                EOP_TC_MD_ACTION_EN |
+       amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+                                              EOP_TC_NC_ACTION_EN) :
+                                             (EOP_TCL1_ACTION_EN |
+                                              EOP_TC_ACTION_EN |
+                                              EOP_TC_WB_ACTION_EN |
+                                              EOP_TC_MD_ACTION_EN)) |
                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
                                 EVENT_INDEX(5)));
        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@@ -4137,6 -4223,20 +4223,20 @@@ static void gfx_v9_0_ring_emit_reg_wait
        gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
  }
  
+ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+                                                 uint32_t reg0, uint32_t reg1,
+                                                 uint32_t ref, uint32_t mask)
+ {
+       int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+       if (amdgpu_sriov_vf(ring->adev))
+               gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+                                     ref, mask, 0x20);
+       else
+               amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+                                                          ref, mask);
+ }
  static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
                                                 enum amdgpu_interrupt_state state)
  {
@@@ -4458,6 -4558,7 +4558,7 @@@ static const struct amdgpu_ring_funcs g
        .emit_tmz = gfx_v9_0_ring_emit_tmz,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+       .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
  };
  
  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
        .set_priority = gfx_v9_0_ring_set_priority_compute,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+       .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
  };
  
  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
        .emit_rreg = gfx_v9_0_ring_emit_rreg,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+       .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
  };
  
  static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@@ -4686,7 -4789,6 +4789,7 @@@ static int gfx_v9_0_get_cu_info(struct 
  
        cu_info->number = active_cu_number;
        cu_info->ao_cu_mask = ao_cu_mask;
 +      cu_info->simd_per_cu = NUM_SIMD_PER_CU;
  
        return 0;
  }
index f22f7a88ce0fe4b7435f0316cb402b49e02993ea,839a144c1645e20066fe1095df962cfa23b44c7b..8dc29107228fd145ad27331a8cf3b0bda55be655
  #define               EOP_TC_WB_ACTION_EN                     (1 << 15) /* L2 */
  #define               EOP_TCL1_ACTION_EN                      (1 << 16)
  #define               EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
+ #define               EOP_TC_NC_ACTION_EN                     (1 << 19)
  #define               EOP_TC_MD_ACTION_EN                     (1 << 21) /* L2 metadata */
  
  #define               DATA_SEL(x)                             ((x) << 29)
                         * x=1: tmz_end
                         */
  
 +#define       PACKET3_INVALIDATE_TLBS                         0x98
 +#              define PACKET3_INVALIDATE_TLBS_DST_SEL(x)     ((x) << 0)
 +#              define PACKET3_INVALIDATE_TLBS_ALL_HUB(x)     ((x) << 4)
 +#              define PACKET3_INVALIDATE_TLBS_PASID(x)       ((x) << 5)
 +#              define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x)  ((x) << 29)
  #define PACKET3_SET_RESOURCES                         0xA0
  /* 1. header
   * 2. CONTROL
index 73fd48d6c756636e19f9dd70bedf2efa3614ebd3,8fd1b742985acad26dbf3fb747fe962acbb6e0a1..8fd1b742985acad26dbf3fb747fe962acbb6e0a1
mode 100644,100755..100644
@@@ -1081,6 -1081,7 +1081,7 @@@ static const struct amdgpu_ring_funcs v
        .end_use = amdgpu_vce_ring_end_use,
        .emit_wreg = vce_v4_0_emit_wreg,
        .emit_reg_wait = vce_v4_0_emit_reg_wait,
+       .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
  };
  
  static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
index 9cd3566def8d3cee902ee099d9fbad1260e0c9f1,e59357724eac1fcda676714875352c66458ecf92..644b2187507b9845e125214f9e941e9a124a33ef
@@@ -589,7 -589,7 +589,7 @@@ static void disable_dangling_plane(stru
   ******************************************************************************/
  
  struct dc *dc_create(const struct dc_init_data *init_params)
 - {
 +{
        struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
        unsigned int full_pipe_count;
  
@@@ -936,95 -936,6 +936,6 @@@ bool dc_post_update_surfaces_to_stream(
        return true;
  }
  
- /*
-  * TODO this whole function needs to go
-  *
-  * dc_surface_update is needlessly complex. See if we can just replace this
-  * with a dc_plane_state and follow the atomic model a bit more closely here.
-  */
- bool dc_commit_planes_to_stream(
-               struct dc *dc,
-               struct dc_plane_state **plane_states,
-               uint8_t new_plane_count,
-               struct dc_stream_state *dc_stream,
-               struct dc_state *state)
- {
-       /* no need to dynamically allocate this. it's pretty small */
-       struct dc_surface_update updates[MAX_SURFACES];
-       struct dc_flip_addrs *flip_addr;
-       struct dc_plane_info *plane_info;
-       struct dc_scaling_info *scaling_info;
-       int i;
-       struct dc_stream_update *stream_update =
-                       kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL);
-       if (!stream_update) {
-               BREAK_TO_DEBUGGER();
-               return false;
-       }
-       flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs),
-                           GFP_KERNEL);
-       plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info),
-                            GFP_KERNEL);
-       scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info),
-                              GFP_KERNEL);
-       if (!flip_addr || !plane_info || !scaling_info) {
-               kfree(flip_addr);
-               kfree(plane_info);
-               kfree(scaling_info);
-               kfree(stream_update);
-               return false;
-       }
-       memset(updates, 0, sizeof(updates));
-       stream_update->src = dc_stream->src;
-       stream_update->dst = dc_stream->dst;
-       stream_update->out_transfer_func = dc_stream->out_transfer_func;
-       for (i = 0; i < new_plane_count; i++) {
-               updates[i].surface = plane_states[i];
-               updates[i].gamma =
-                       (struct dc_gamma *)plane_states[i]->gamma_correction;
-               updates[i].in_transfer_func = plane_states[i]->in_transfer_func;
-               flip_addr[i].address = plane_states[i]->address;
-               flip_addr[i].flip_immediate = plane_states[i]->flip_immediate;
-               plane_info[i].color_space = plane_states[i]->color_space;
-               plane_info[i].input_tf = plane_states[i]->input_tf;
-               plane_info[i].format = plane_states[i]->format;
-               plane_info[i].plane_size = plane_states[i]->plane_size;
-               plane_info[i].rotation = plane_states[i]->rotation;
-               plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror;
-               plane_info[i].stereo_format = plane_states[i]->stereo_format;
-               plane_info[i].tiling_info = plane_states[i]->tiling_info;
-               plane_info[i].visible = plane_states[i]->visible;
-               plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha;
-               plane_info[i].dcc = plane_states[i]->dcc;
-               scaling_info[i].scaling_quality = plane_states[i]->scaling_quality;
-               scaling_info[i].src_rect = plane_states[i]->src_rect;
-               scaling_info[i].dst_rect = plane_states[i]->dst_rect;
-               scaling_info[i].clip_rect = plane_states[i]->clip_rect;
-               updates[i].flip_addr = &flip_addr[i];
-               updates[i].plane_info = &plane_info[i];
-               updates[i].scaling_info = &scaling_info[i];
-       }
-       dc_commit_updates_for_stream(
-                       dc,
-                       updates,
-                       new_plane_count,
-                       dc_stream, stream_update, plane_states, state);
-       kfree(flip_addr);
-       kfree(plane_info);
-       kfree(scaling_info);
-       kfree(stream_update);
-       return true;
- }
  struct dc_state *dc_create_state(void)
  {
        struct dc_state *context = kzalloc(sizeof(struct dc_state),
@@@ -1107,9 -1018,6 +1018,6 @@@ static enum surface_update_type get_pla
        if (u->plane_info->color_space != u->surface->color_space)
                update_flags->bits.color_space_change = 1;
  
-       if (u->plane_info->input_tf != u->surface->input_tf)
-               update_flags->bits.input_tf_change = 1;
        if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror)
                update_flags->bits.horizontal_mirror_change = 1;
  
@@@ -1243,12 -1151,20 +1151,20 @@@ static enum surface_update_type det_sur
        if (u->input_csc_color_matrix)
                update_flags->bits.input_csc_change = 1;
  
-       if (update_flags->bits.in_transfer_func_change
-                       || update_flags->bits.input_csc_change) {
+       if (u->coeff_reduction_factor)
+               update_flags->bits.coeff_reduction_change = 1;
+       if (update_flags->bits.in_transfer_func_change) {
                type = UPDATE_TYPE_MED;
                elevate_update_type(&overall_type, type);
        }
  
+       if (update_flags->bits.input_csc_change
+                       || update_flags->bits.coeff_reduction_change) {
+               type = UPDATE_TYPE_FULL;
+               elevate_update_type(&overall_type, type);
+       }
        return overall_type;
  }
  
@@@ -1297,7 -1213,7 +1213,7 @@@ enum surface_update_type dc_check_updat
        type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status);
        if (type == UPDATE_TYPE_FULL)
                for (i = 0; i < surface_count; i++)
-                       updates[i].surface->update_flags.bits.full_update = 1;
+                       updates[i].surface->update_flags.raw = 0xFFFFFFFF;
  
        return type;
  }
@@@ -1375,6 -1291,12 +1291,12 @@@ static void commit_planes_for_stream(st
                                        pipe_ctx->stream_res.abm->funcs->set_abm_level(
                                                        pipe_ctx->stream_res.abm, stream->abm_level);
                        }
+                       if (stream_update && stream_update->periodic_fn_vsync_delta &&
+                                       pipe_ctx->stream_res.tg->funcs->program_vline_interrupt)
+                               pipe_ctx->stream_res.tg->funcs->program_vline_interrupt(
+                                               pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing,
+                                               pipe_ctx->stream->periodic_fn_vsync_delta);
                }
        }
  
index 38e8041b5f0c738c7e958dce36adb22391dd660b,0000000000000000000000000000000000000000..cdb582043b4fc26cc63a64e04eadcd1bd559e752
mode 100644,000000..100644
--- /dev/null
@@@ -1,371 -1,0 +1,371 @@@
-                                     32, NULL);
 +// SPDX-License-Identifier: GPL-2.0+
 +/* Copyright (C) 2014-2018 Broadcom */
 +
 +/**
 + * DOC: Broadcom V3D Graphics Driver
 + *
 + * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs.
 + * For V3D 2.x support, see the VC4 driver.
 + *
 + * Currently only single-core rendering using the binner and renderer
 + * is supported.  The TFU (texture formatting unit) and V3D 4.x's CSD
 + * (compute shader dispatch) are not yet supported.
 + */
 +
 +#include <linux/clk.h>
 +#include <linux/device.h>
 +#include <linux/io.h>
 +#include <linux/module.h>
 +#include <linux/of_platform.h>
 +#include <linux/platform_device.h>
 +#include <linux/pm_runtime.h>
 +#include <drm/drm_fb_cma_helper.h>
 +#include <drm/drm_fb_helper.h>
 +
 +#include "uapi/drm/v3d_drm.h"
 +#include "v3d_drv.h"
 +#include "v3d_regs.h"
 +
 +#define DRIVER_NAME "v3d"
 +#define DRIVER_DESC "Broadcom V3D graphics"
 +#define DRIVER_DATE "20180419"
 +#define DRIVER_MAJOR 1
 +#define DRIVER_MINOR 0
 +#define DRIVER_PATCHLEVEL 0
 +
 +#ifdef CONFIG_PM
 +static int v3d_runtime_suspend(struct device *dev)
 +{
 +      struct drm_device *drm = dev_get_drvdata(dev);
 +      struct v3d_dev *v3d = to_v3d_dev(drm);
 +
 +      v3d_irq_disable(v3d);
 +
 +      clk_disable_unprepare(v3d->clk);
 +
 +      return 0;
 +}
 +
 +static int v3d_runtime_resume(struct device *dev)
 +{
 +      struct drm_device *drm = dev_get_drvdata(dev);
 +      struct v3d_dev *v3d = to_v3d_dev(drm);
 +      int ret;
 +
 +      ret = clk_prepare_enable(v3d->clk);
 +      if (ret != 0)
 +              return ret;
 +
 +      /* XXX: VPM base */
 +
 +      v3d_mmu_set_page_table(v3d);
 +      v3d_irq_enable(v3d);
 +
 +      return 0;
 +}
 +#endif
 +
 +static const struct dev_pm_ops v3d_v3d_pm_ops = {
 +      SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL)
 +};
 +
 +static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
 +                             struct drm_file *file_priv)
 +{
 +      struct v3d_dev *v3d = to_v3d_dev(dev);
 +      struct drm_v3d_get_param *args = data;
 +      int ret;
 +      static const u32 reg_map[] = {
 +              [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG,
 +              [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1,
 +              [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2,
 +              [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3,
 +              [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0,
 +              [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1,
 +              [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2,
 +      };
 +
 +      if (args->pad != 0)
 +              return -EINVAL;
 +
 +      /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need
 +       * to explicitly allow it in the "the register in our
 +       * parameter map" check.
 +       */
 +      if (args->param < ARRAY_SIZE(reg_map) &&
 +          (reg_map[args->param] ||
 +           args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) {
 +              u32 offset = reg_map[args->param];
 +
 +              if (args->value != 0)
 +                      return -EINVAL;
 +
 +              ret = pm_runtime_get_sync(v3d->dev);
 +              if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 &&
 +                  args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) {
 +                      args->value = V3D_CORE_READ(0, offset);
 +              } else {
 +                      args->value = V3D_READ(offset);
 +              }
 +              pm_runtime_mark_last_busy(v3d->dev);
 +              pm_runtime_put_autosuspend(v3d->dev);
 +              return 0;
 +      }
 +
 +      /* Any params that aren't just register reads would go here. */
 +
 +      DRM_DEBUG("Unknown parameter %d\n", args->param);
 +      return -EINVAL;
 +}
 +
 +static int
 +v3d_open(struct drm_device *dev, struct drm_file *file)
 +{
 +      struct v3d_dev *v3d = to_v3d_dev(dev);
 +      struct v3d_file_priv *v3d_priv;
 +      int i;
 +
 +      v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
 +      if (!v3d_priv)
 +              return -ENOMEM;
 +
 +      v3d_priv->v3d = v3d;
 +
 +      for (i = 0; i < V3D_MAX_QUEUES; i++) {
 +              drm_sched_entity_init(&v3d->queue[i].sched,
 +                                    &v3d_priv->sched_entity[i],
 +                                    &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
++                                    NULL);
 +      }
 +
 +      file->driver_priv = v3d_priv;
 +
 +      return 0;
 +}
 +
 +static void
 +v3d_postclose(struct drm_device *dev, struct drm_file *file)
 +{
 +      struct v3d_dev *v3d = to_v3d_dev(dev);
 +      struct v3d_file_priv *v3d_priv = file->driver_priv;
 +      enum v3d_queue q;
 +
 +      for (q = 0; q < V3D_MAX_QUEUES; q++) {
 +              drm_sched_entity_fini(&v3d->queue[q].sched,
 +                                    &v3d_priv->sched_entity[q]);
 +      }
 +
 +      kfree(v3d_priv);
 +}
 +
 +static const struct file_operations v3d_drm_fops = {
 +      .owner = THIS_MODULE,
 +      .open = drm_open,
 +      .release = drm_release,
 +      .unlocked_ioctl = drm_ioctl,
 +      .mmap = v3d_mmap,
 +      .poll = drm_poll,
 +      .read = drm_read,
 +      .compat_ioctl = drm_compat_ioctl,
 +      .llseek = noop_llseek,
 +};
 +
 +/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
 + * protection between clients.  Note that render nodes would be be
 + * able to submit CLs that could access BOs from clients authenticated
 + * with the master node.
 + */
 +static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
 +      DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
 +      DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW),
 +      DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW),
 +      DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
 +      DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
 +      DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
 +};
 +
 +static const struct vm_operations_struct v3d_vm_ops = {
 +      .fault = v3d_gem_fault,
 +      .open = drm_gem_vm_open,
 +      .close = drm_gem_vm_close,
 +};
 +
 +static struct drm_driver v3d_drm_driver = {
 +      .driver_features = (DRIVER_GEM |
 +                          DRIVER_RENDER |
 +                          DRIVER_PRIME |
 +                          DRIVER_SYNCOBJ),
 +
 +      .open = v3d_open,
 +      .postclose = v3d_postclose,
 +
 +#if defined(CONFIG_DEBUG_FS)
 +      .debugfs_init = v3d_debugfs_init,
 +#endif
 +
 +      .gem_free_object_unlocked = v3d_free_object,
 +      .gem_vm_ops = &v3d_vm_ops,
 +
 +      .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 +      .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 +      .gem_prime_import = drm_gem_prime_import,
 +      .gem_prime_export = drm_gem_prime_export,
 +      .gem_prime_res_obj = v3d_prime_res_obj,
 +      .gem_prime_get_sg_table = v3d_prime_get_sg_table,
 +      .gem_prime_import_sg_table = v3d_prime_import_sg_table,
 +      .gem_prime_mmap = v3d_prime_mmap,
 +
 +      .ioctls = v3d_drm_ioctls,
 +      .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
 +      .fops = &v3d_drm_fops,
 +
 +      .name = DRIVER_NAME,
 +      .desc = DRIVER_DESC,
 +      .date = DRIVER_DATE,
 +      .major = DRIVER_MAJOR,
 +      .minor = DRIVER_MINOR,
 +      .patchlevel = DRIVER_PATCHLEVEL,
 +};
 +
 +static const struct of_device_id v3d_of_match[] = {
 +      { .compatible = "brcm,7268-v3d" },
 +      { .compatible = "brcm,7278-v3d" },
 +      {},
 +};
 +MODULE_DEVICE_TABLE(of, v3d_of_match);
 +
 +static int
 +map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
 +{
 +      struct resource *res =
 +              platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name);
 +
 +      *regs = devm_ioremap_resource(v3d->dev, res);
 +      return PTR_ERR_OR_ZERO(*regs);
 +}
 +
 +static int v3d_platform_drm_probe(struct platform_device *pdev)
 +{
 +      struct device *dev = &pdev->dev;
 +      struct drm_device *drm;
 +      struct v3d_dev *v3d;
 +      int ret;
 +      u32 ident1;
 +
 +      dev->coherent_dma_mask = DMA_BIT_MASK(36);
 +
 +      v3d = kzalloc(sizeof(*v3d), GFP_KERNEL);
 +      if (!v3d)
 +              return -ENOMEM;
 +      v3d->dev = dev;
 +      v3d->pdev = pdev;
 +      drm = &v3d->drm;
 +
 +      ret = map_regs(v3d, &v3d->bridge_regs, "bridge");
 +      if (ret)
 +              goto dev_free;
 +
 +      ret = map_regs(v3d, &v3d->hub_regs, "hub");
 +      if (ret)
 +              goto dev_free;
 +
 +      ret = map_regs(v3d, &v3d->core_regs[0], "core0");
 +      if (ret)
 +              goto dev_free;
 +
 +      ident1 = V3D_READ(V3D_HUB_IDENT1);
 +      v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 +
 +                  V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV));
 +      v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
 +      WARN_ON(v3d->cores > 1); /* multicore not yet implemented */
 +
 +      if (v3d->ver < 41) {
 +              ret = map_regs(v3d, &v3d->gca_regs, "gca");
 +              if (ret)
 +                      goto dev_free;
 +      }
 +
 +      v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr,
 +                                      GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
 +      if (!v3d->mmu_scratch) {
 +              dev_err(dev, "Failed to allocate MMU scratch page\n");
 +              ret = -ENOMEM;
 +              goto dev_free;
 +      }
 +
 +      pm_runtime_use_autosuspend(dev);
 +      pm_runtime_set_autosuspend_delay(dev, 50);
 +      pm_runtime_enable(dev);
 +
 +      ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev);
 +      if (ret)
 +              goto dma_free;
 +
 +      platform_set_drvdata(pdev, drm);
 +      drm->dev_private = v3d;
 +
 +      ret = v3d_gem_init(drm);
 +      if (ret)
 +              goto dev_destroy;
 +
 +      v3d_irq_init(v3d);
 +
 +      ret = drm_dev_register(drm, 0);
 +      if (ret)
 +              goto gem_destroy;
 +
 +      return 0;
 +
 +gem_destroy:
 +      v3d_gem_destroy(drm);
 +dev_destroy:
 +      drm_dev_put(drm);
 +dma_free:
 +      dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
 +dev_free:
 +      kfree(v3d);
 +      return ret;
 +}
 +
 +static int v3d_platform_drm_remove(struct platform_device *pdev)
 +{
 +      struct drm_device *drm = platform_get_drvdata(pdev);
 +      struct v3d_dev *v3d = to_v3d_dev(drm);
 +
 +      drm_dev_unregister(drm);
 +
 +      v3d_gem_destroy(drm);
 +
 +      drm_dev_put(drm);
 +
 +      dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
 +
 +      return 0;
 +}
 +
 +static struct platform_driver v3d_platform_driver = {
 +      .probe          = v3d_platform_drm_probe,
 +      .remove         = v3d_platform_drm_remove,
 +      .driver         = {
 +              .name   = "v3d",
 +              .of_match_table = v3d_of_match,
 +      },
 +};
 +
 +static int __init v3d_drm_register(void)
 +{
 +      return platform_driver_register(&v3d_platform_driver);
 +}
 +
 +static void __exit v3d_drm_unregister(void)
 +{
 +      platform_driver_unregister(&v3d_platform_driver);
 +}
 +
 +module_init(v3d_drm_register);
 +module_exit(v3d_drm_unregister);
 +
 +MODULE_ALIAS("platform:v3d-drm");
 +MODULE_DESCRIPTION("Broadcom V3D DRM Driver");
 +MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
 +MODULE_LICENSE("GPL v2");