bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &bo->tbo;
- /* One for TTM and one for the CS job */
- p->uf_entry.tv.num_shared = 2;
+ /* One for TTM and two for the CS job */
+ p->uf_entry.tv.num_shared = 3;
drm_gem_object_put(gobj);
goto free_chunk;
}
+ mutex_lock(&p->ctx->lock);
+
/* skip guilty context job */
if (atomic_read(&p->ctx->guilty) == 1) {
ret = -ECANCELED;
if (r) {
kvfree(e->user_pages);
e->user_pages = NULL;
- return r;
+ goto out_free_user_pages;
}
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto out;
+ goto out_free_user_pages;
}
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
e->bo_va = amdgpu_vm_bo_find(vm, bo);
-
- if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
- e->chain = dma_fence_chain_alloc();
- if (!e->chain) {
- r = -ENOMEM;
- goto error_validate;
- }
- }
}
/* Move fence waiting after getting reservation lock of
}
error_validate:
- if (r) {
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- dma_fence_chain_free(e->chain);
- e->chain = NULL;
- }
+ if (r)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
- out:
- }
+
+ out_free_user_pages:
+ if (r) {
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ if (!e->user_pages)
+ continue;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
+ }
+ }
return r;
}
{
unsigned i;
- if (error && backoff) {
- struct amdgpu_bo_list_entry *e;
-
- amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
- dma_fence_chain_free(e->chain);
- e->chain = NULL;
- }
-
+ if (error && backoff)
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
- }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
dma_fence_put(parser->fence);
if (parser->ctx) {
+ mutex_unlock(&parser->ctx->lock);
amdgpu_ctx_put(parser->ctx);
}
if (parser->bo_list)
{
int i, r;
+ /* TODO: Investigate why we still need the context lock */
+ mutex_unlock(&p->ctx->lock);
+
for (i = 0; i < p->nchunks; ++i) {
struct amdgpu_cs_chunk *chunk;
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
}
}
- return 0;
+out:
+ mutex_lock(&p->ctx->lock);
+ return r;
}
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct dma_resv *resv = e->tv.bo->base.resv;
- struct dma_fence_chain *chain = e->chain;
-
- if (!chain)
- continue;
-
- /*
- * Work around dma_resv shortcomings by wrapping up the
- * submission in a dma_fence_chain and add it as exclusive
- * fence.
- */
- dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
- dma_fence_get(p->fence), 1);
-
- rcu_assign_pointer(resv->fence_excl, &chain->base);
- e->chain = NULL;
- }
+ /* Make sure all BOs are remembered as writers */
+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
+ e->tv.num_shared = 0;
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
goto out;
r = amdgpu_cs_submit(&parser, cs);
+
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
int i, r;
if (!addr) {
- addr = kvmalloc_array(prange->npages, sizeof(*addr),
- GFP_KERNEL | __GFP_ZERO);
+ addr = kvcalloc(prange->npages, sizeof(*addr), GFP_KERNEL);
if (!addr)
return -ENOMEM;
prange->dma_addr[gpuidx] = addr;
goto reserve_bo_failed;
}
- r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
+ r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
amdgpu_bo_unreserve(bo);
static void
svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ bool *update_mapping)
{
uint32_t i;
int gpuidx;
case KFD_IOCTL_SVM_ATTR_ACCESS:
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ *update_mapping = true;
gpuidx = kfd_process_gpuidx_from_gpuid(p,
attrs[i].value);
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
}
break;
case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ *update_mapping = true;
prange->flags |= attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ *update_mapping = true;
prange->flags &= ~attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
new->prefetch_loc = old->prefetch_loc;
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
+ new->mapped_to_gpu = old->mapped_to_gpu;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
uint32_t gpuidx;
int r = 0;
+ if (!prange->mapped_to_gpu) {
+ pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
+ prange, prange->start, prange->last);
+ return 0;
+ }
+
+ if (prange->start == start && prange->last == last) {
+ pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
+ prange->mapped_to_gpu = false;
+ }
+
bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
MAX_GPU_INSTANCE);
p = container_of(prange->svms, struct kfd_process, svms);
svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
unsigned long offset, unsigned long npages, bool readonly,
dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
- struct dma_fence **fence)
+ struct dma_fence **fence, bool flush_tlb)
{
struct amdgpu_device *adev = pdd->dev->adev;
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);
- r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
last_start, prange->start + i,
pte_flags,
last_start - prange->start,
static int
svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
unsigned long npages, bool readonly,
- unsigned long *bitmap, bool wait)
+ unsigned long *bitmap, bool wait, bool flush_tlb)
{
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
prange->dma_addr[gpuidx],
- bo_adev, wait ? &fence : NULL);
+ bo_adev, wait ? &fence : NULL,
+ flush_tlb);
if (r)
break;
* 5. Release page table (and SVM BO) reservation
*/
static int svm_range_validate_and_map(struct mm_struct *mm,
- struct svm_range *prange,
- int32_t gpuidx, bool intr, bool wait)
+ struct svm_range *prange, int32_t gpuidx,
+ bool intr, bool wait, bool flush_tlb)
{
struct svm_validate_context ctx;
unsigned long start, end, addr;
prange->bitmap_aip, MAX_GPU_INSTANCE);
}
- if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
- return 0;
+ if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
+ if (!prange->mapped_to_gpu)
+ return 0;
+
+ bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ }
if (prange->actual_loc && !prange->ttm_res) {
/* This should never happen. actual_loc gets set by
}
r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx.bitmap, wait);
+ ctx.bitmap, wait, flush_tlb);
unlock_out:
svm_range_unlock(prange);
addr = next;
}
- if (addr == end)
+ if (addr == end) {
prange->validated_once = true;
+ prange->mapped_to_gpu = true;
+ }
unreserve_out:
svm_range_unreserve_bos(&ctx);
mutex_lock(&prange->migrate_mutex);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- false, true);
+ false, true, false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
prange->start);
new->prefetch_loc = old->prefetch_loc;
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
+ new->mapped_to_gpu = old->mapped_to_gpu;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
}
}
- r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+ r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
struct svm_range_list *svms;
struct svm_range *prange;
struct svm_range *next;
+ bool update_mapping = false;
+ bool flush_tlb;
int r = 0;
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
svm_range_add_notifier_locked(mm, prange);
}
list_for_each_entry(prange, &update_list, update_list) {
- svm_range_apply_attrs(p, prange, nattr, attrs);
+ svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
/* TODO: unmap ranges from GPU that lost access */
}
list_for_each_entry_safe(prange, next, &remove_list, update_list) {
continue;
}
+ if (!migrated && !update_mapping) {
+ mutex_unlock(&prange->migrate_mutex);
+ continue;
+ }
+
+ flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
+
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- true, true);
+ true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);