Merge tag 'amd-drm-next-5.19-2022-04-29' of https://gitlab.freedesktop.org/agd5f...
authorDave Airlie <airlied@redhat.com>
Fri, 6 May 2022 05:05:27 +0000 (15:05 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 6 May 2022 05:05:27 +0000 (15:05 +1000)
amd-drm-next-5.19-2022-04-29:

amdgpu
- RAS updates
- SI dpm deadlock fix
- Misc code cleanups
- HDCP fixes
- PSR fixes
- DSC fixes
- SDMA doorbell cleanups
- S0ix fix
- DC FP fix
- Zen dom0 regression fix for APUs
- IP discovery updates
- Initial SoC21 support
- Support for new vbios tables
- Runtime PM fixes
- Add PSP TA debugfs interface

amdkfd:
- Misc code cleanups
- Ignore bogus MEC signals more efficiently
- SVM fixes
- Use bitmap helpers

radeon:
- Misc code cleanups
- Spelling/grammer fixes

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220429144853.5742-1-alexander.deucher@amd.com
1  2 
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 01853431249dfd051a40ddc15224f6e204eb7249,67bd506fa141b9f02c60d01684f9700b15dedca2..2982b543c27fd58a9fa5e2234bd0730f31f2fdfa
@@@ -55,8 -55,8 +55,8 @@@ static int amdgpu_cs_user_fence_chunk(s
        bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
        p->uf_entry.priority = 0;
        p->uf_entry.tv.bo = &bo->tbo;
 -      /* One for TTM and one for the CS job */
 -      p->uf_entry.tv.num_shared = 2;
 +      /* One for TTM and two for the CS job */
 +      p->uf_entry.tv.num_shared = 3;
  
        drm_gem_object_put(gobj);
  
@@@ -128,8 -128,6 +128,8 @@@ static int amdgpu_cs_parser_init(struc
                goto free_chunk;
        }
  
 +      mutex_lock(&p->ctx->lock);
 +
        /* skip guilty context job */
        if (atomic_read(&p->ctx->guilty) == 1) {
                ret = -ECANCELED;
@@@ -552,7 -550,7 +552,7 @@@ static int amdgpu_cs_parser_bos(struct 
                if (r) {
                        kvfree(e->user_pages);
                        e->user_pages = NULL;
-                       return r;
+                       goto out_free_user_pages;
                }
  
                for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-               goto out;
+               goto out_free_user_pages;
        }
  
        amdgpu_bo_list_for_each_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
  
                e->bo_va = amdgpu_vm_bo_find(vm, bo);
 -
 -              if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
 -                      e->chain = dma_fence_chain_alloc();
 -                      if (!e->chain) {
 -                              r = -ENOMEM;
 -                              goto error_validate;
 -                      }
 -              }
        }
  
        /* Move fence waiting after getting reservation lock of
        }
  
  error_validate:
 -      if (r) {
 -              amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 -                      dma_fence_chain_free(e->chain);
 -                      e->chain = NULL;
 -              }
 +      if (r)
                ttm_eu_backoff_reservation(&p->ticket, &p->validated);
- out:
 -      }
+ out_free_user_pages:
+       if (r) {
+               amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+                       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+                       if (!e->user_pages)
+                               continue;
+                       amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+                       kvfree(e->user_pages);
+                       e->user_pages = NULL;
+               }
+       }
        return r;
  }
  
@@@ -677,9 -700,17 +689,9 @@@ static void amdgpu_cs_parser_fini(struc
  {
        unsigned i;
  
 -      if (error && backoff) {
 -              struct amdgpu_bo_list_entry *e;
 -
 -              amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
 -                      dma_fence_chain_free(e->chain);
 -                      e->chain = NULL;
 -              }
 -
 +      if (error && backoff)
                ttm_eu_backoff_reservation(&parser->ticket,
                                           &parser->validated);
 -      }
  
        for (i = 0; i < parser->num_post_deps; i++) {
                drm_syncobj_put(parser->post_deps[i].syncobj);
        dma_fence_put(parser->fence);
  
        if (parser->ctx) {
 +              mutex_unlock(&parser->ctx->lock);
                amdgpu_ctx_put(parser->ctx);
        }
        if (parser->bo_list)
@@@ -1139,9 -1169,6 +1151,9 @@@ static int amdgpu_cs_dependencies(struc
  {
        int i, r;
  
 +      /* TODO: Investigate why we still need the context lock */
 +      mutex_unlock(&p->ctx->lock);
 +
        for (i = 0; i < p->nchunks; ++i) {
                struct amdgpu_cs_chunk *chunk;
  
                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
                        r = amdgpu_cs_process_fence_dep(p, chunk);
                        if (r)
 -                              return r;
 +                              goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
                        r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
                        if (r)
 -                              return r;
 +                              goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
                        r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
                        if (r)
 -                              return r;
 +                              goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
                        r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
                        if (r)
 -                              return r;
 +                              goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
                        r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
                        if (r)
 -                              return r;
 +                              goto out;
                        break;
                }
        }
  
 -      return 0;
 +out:
 +      mutex_lock(&p->ctx->lock);
 +      return r;
  }
  
  static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@@ -1259,9 -1284,24 +1271,9 @@@ static int amdgpu_cs_submit(struct amdg
  
        amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
  
 -      amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 -              struct dma_resv *resv = e->tv.bo->base.resv;
 -              struct dma_fence_chain *chain = e->chain;
 -
 -              if (!chain)
 -                      continue;
 -
 -              /*
 -               * Work around dma_resv shortcomings by wrapping up the
 -               * submission in a dma_fence_chain and add it as exclusive
 -               * fence.
 -               */
 -              dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
 -                                   dma_fence_get(p->fence), 1);
 -
 -              rcu_assign_pointer(resv->fence_excl, &chain->base);
 -              e->chain = NULL;
 -      }
 +      /* Make sure all BOs are remembered as writers */
 +      amdgpu_bo_list_for_each_entry(e, p->bo_list)
 +              e->tv.num_shared = 0;
  
        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        mutex_unlock(&p->adev->notifier_lock);
@@@ -1340,7 -1380,6 +1352,7 @@@ int amdgpu_cs_ioctl(struct drm_device *
                goto out;
  
        r = amdgpu_cs_submit(&parser, cs);
 +
  out:
        amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
  
index 7e3a7fcb9fe60036f2a52ae496650cc992c419d6,43cd47723946504316a67597246f0f96a72240ee..997650d597eca47d21465cfbb9c4031fb34428b0
@@@ -24,7 -24,6 +24,7 @@@
  #include <linux/hmm.h>
  #include <linux/dma-direction.h>
  #include <linux/dma-mapping.h>
 +#include <linux/migrate.h>
  #include "amdgpu_sync.h"
  #include "amdgpu_object.h"
  #include "amdgpu_vm.h"
@@@ -222,6 -221,7 +222,6 @@@ svm_migrate_get_vram_page(struct svm_ra
        page = pfn_to_page(pfn);
        svm_range_bo_ref(prange->svm_bo);
        page->zone_device_data = prange->svm_bo;
 -      get_page(page);
        lock_page(page);
  }
  
@@@ -410,7 -410,6 +410,6 @@@ svm_migrate_vma_to_vram(struct amdgpu_d
        struct migrate_vma migrate;
        unsigned long cpages = 0;
        dma_addr_t *scratch;
-       size_t size;
        void *buf;
        int r = -ENOMEM;
  
        migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
        migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
  
-       size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
-       size *= npages;
-       buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+       buf = kvcalloc(npages,
+                      2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+                      GFP_KERNEL);
        if (!buf)
                goto out;
  
@@@ -665,7 -664,6 +664,6 @@@ svm_migrate_vma_to_ram(struct amdgpu_de
        struct dma_fence *mfence = NULL;
        struct migrate_vma migrate;
        dma_addr_t *scratch;
-       size_t size;
        void *buf;
        int r = -ENOMEM;
  
        migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
        migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
  
-       size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
-       size *= npages;
-       buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+       buf = kvcalloc(npages,
+                      2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+                      GFP_KERNEL);
        if (!buf)
                goto out;
  
index bfb3b5c288cbdc7c132b76db9011ed2f2a7a2147,b9ca957246dce8875162d30f054b8a328daeb313..10bb3bb46246bd36367aaed08196d9582c426465
@@@ -26,7 -26,6 +26,7 @@@
  
  #include <linux/hashtable.h>
  #include <linux/mmu_notifier.h>
 +#include <linux/memremap.h>
  #include <linux/mutex.h>
  #include <linux/types.h>
  #include <linux/atomic.h>
@@@ -272,6 -271,7 +272,7 @@@ struct kfd_dev 
  
        struct kgd2kfd_shared_resources shared_resources;
        struct kfd_vmid_info vm_info;
+       struct kfd_local_mem_info local_mem_info;
  
        const struct kfd2kgd_calls *kfd2kgd;
        struct mutex doorbell_mutex;
@@@ -1295,7 -1295,7 +1296,7 @@@ extern const struct kfd_event_interrupt
  
  extern const struct kfd_device_global_init_class device_global_init_class_cik;
  
void kfd_event_init_process(struct kfd_process *p);
int kfd_event_init_process(struct kfd_process *p);
  void kfd_event_free_process(struct kfd_process *p);
  int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
  int kfd_wait_on_events(struct kfd_process *p,
index 11b395b90a3dcd8cfd9139e6d7f0e345328cce25,5ed8d9b549a47c5bdf9629763f492c33c7625ac8..29e9ebf6d8d5089a2635a1a4ac26645c25158c71
@@@ -149,8 -149,7 +149,7 @@@ svm_range_dma_map_dev(struct amdgpu_dev
        int i, r;
  
        if (!addr) {
-               addr = kvmalloc_array(prange->npages, sizeof(*addr),
-                                     GFP_KERNEL | __GFP_ZERO);
+               addr = kvcalloc(prange->npages, sizeof(*addr), GFP_KERNEL);
                if (!addr)
                        return -ENOMEM;
                prange->dma_addr[gpuidx] = addr;
@@@ -548,7 -547,7 +547,7 @@@ svm_range_vram_node_new(struct amdgpu_d
                goto reserve_bo_failed;
        }
  
 -      r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
 +      r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
        if (r) {
                pr_debug("failed %d to reserve bo\n", r);
                amdgpu_bo_unreserve(bo);
@@@ -686,7 -685,8 +685,8 @@@ svm_range_check_attr(struct kfd_proces
  
  static void
  svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
-                     uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+                     uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+                     bool *update_mapping)
  {
        uint32_t i;
        int gpuidx;
                case KFD_IOCTL_SVM_ATTR_ACCESS:
                case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
                case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+                       *update_mapping = true;
                        gpuidx = kfd_process_gpuidx_from_gpuid(p,
                                                               attrs[i].value);
                        if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
                        }
                        break;
                case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+                       *update_mapping = true;
                        prange->flags |= attrs[i].value;
                        break;
                case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+                       *update_mapping = true;
                        prange->flags &= ~attrs[i].value;
                        break;
                case KFD_IOCTL_SVM_ATTR_GRANULARITY:
@@@ -951,6 -954,7 +954,7 @@@ svm_range_split_adjust(struct svm_rang
        new->prefetch_loc = old->prefetch_loc;
        new->actual_loc = old->actual_loc;
        new->granularity = old->granularity;
+       new->mapped_to_gpu = old->mapped_to_gpu;
        bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
        bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
  
@@@ -1204,6 -1208,17 +1208,17 @@@ svm_range_unmap_from_gpus(struct svm_ra
        uint32_t gpuidx;
        int r = 0;
  
+       if (!prange->mapped_to_gpu) {
+               pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
+                        prange, prange->start, prange->last);
+               return 0;
+       }
+       if (prange->start == start && prange->last == last) {
+               pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
+               prange->mapped_to_gpu = false;
+       }
        bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
                  MAX_GPU_INSTANCE);
        p = container_of(prange->svms, struct kfd_process, svms);
@@@ -1239,7 -1254,7 +1254,7 @@@ static in
  svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
                     unsigned long offset, unsigned long npages, bool readonly,
                     dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
-                    struct dma_fence **fence)
+                    struct dma_fence **fence, bool flush_tlb)
  {
        struct amdgpu_device *adev = pdd->dev->adev;
        struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
                         (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
                         pte_flags);
  
-               r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
+               r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
                                           last_start, prange->start + i,
                                           pte_flags,
                                           last_start - prange->start,
@@@ -1311,7 -1326,7 +1326,7 @@@ out
  static int
  svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
                      unsigned long npages, bool readonly,
-                     unsigned long *bitmap, bool wait)
+                     unsigned long *bitmap, bool wait, bool flush_tlb)
  {
        struct kfd_process_device *pdd;
        struct amdgpu_device *bo_adev;
  
                r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
                                         prange->dma_addr[gpuidx],
-                                        bo_adev, wait ? &fence : NULL);
+                                        bo_adev, wait ? &fence : NULL,
+                                        flush_tlb);
                if (r)
                        break;
  
@@@ -1467,8 -1483,8 +1483,8 @@@ static void *kfd_svm_page_owner(struct 
   * 5. Release page table (and SVM BO) reservation
   */
  static int svm_range_validate_and_map(struct mm_struct *mm,
-                                     struct svm_range *prange,
-                                     int32_t gpuidx, bool intr, bool wait)
+                                     struct svm_range *prange, int32_t gpuidx,
+                                     bool intr, bool wait, bool flush_tlb)
  {
        struct svm_validate_context ctx;
        unsigned long start, end, addr;
                          prange->bitmap_aip, MAX_GPU_INSTANCE);
        }
  
-       if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
-               return 0;
+       if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
+               if (!prange->mapped_to_gpu)
+                       return 0;
+               bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+       }
  
        if (prange->actual_loc && !prange->ttm_res) {
                /* This should never happen. actual_loc gets set by
                }
  
                r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-                                         ctx.bitmap, wait);
+                                         ctx.bitmap, wait, flush_tlb);
  
  unlock_out:
                svm_range_unlock(prange);
                addr = next;
        }
  
-       if (addr == end)
+       if (addr == end) {
                prange->validated_once = true;
+               prange->mapped_to_gpu = true;
+       }
  
  unreserve_out:
        svm_range_unreserve_bos(&ctx);
@@@ -1674,7 -1696,7 +1696,7 @@@ static void svm_range_restore_work(stru
                mutex_lock(&prange->migrate_mutex);
  
                r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-                                              false, true);
+                                              false, true, false);
                if (r)
                        pr_debug("failed %d to map 0x%lx to gpus\n", r,
                                 prange->start);
@@@ -1820,6 -1842,7 +1842,7 @@@ static struct svm_range *svm_range_clon
        new->prefetch_loc = old->prefetch_loc;
        new->actual_loc = old->actual_loc;
        new->granularity = old->granularity;
+       new->mapped_to_gpu = old->mapped_to_gpu;
        bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
        bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
  
@@@ -2811,7 -2834,7 +2834,7 @@@ retry_write_locked
                }
        }
  
-       r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+       r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
        if (r)
                pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
                         r, svms, prange->start, prange->last);
@@@ -3224,6 -3247,8 +3247,8 @@@ svm_range_set_attr(struct kfd_process *
        struct svm_range_list *svms;
        struct svm_range *prange;
        struct svm_range *next;
+       bool update_mapping = false;
+       bool flush_tlb;
        int r = 0;
  
        pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
                svm_range_add_notifier_locked(mm, prange);
        }
        list_for_each_entry(prange, &update_list, update_list) {
-               svm_range_apply_attrs(p, prange, nattr, attrs);
+               svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
                /* TODO: unmap ranges from GPU that lost access */
        }
        list_for_each_entry_safe(prange, next, &remove_list, update_list) {
                        continue;
                }
  
+               if (!migrated && !update_mapping) {
+                       mutex_unlock(&prange->migrate_mutex);
+                       continue;
+               }
+               flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
                r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-                                              true, true);
+                                              true, true, flush_tlb);
                if (r)
                        pr_debug("failed %d to map svm range\n", r);