drm/ttm: Fix multihop assert on eviction.
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Tue, 22 Jun 2021 16:23:39 +0000 (12:23 -0400)
committerAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Wed, 23 Jun 2021 18:59:39 +0000 (14:59 -0400)
Problem:
Under memory pressure when GTT domain is almost full multihop assert
will come up when trying to evict LRU BO from VRAM to SYSTEM.

Fix:
Don't assert on multihop error in evict code but rather do a retry
as we do in ttm_bo_move_buffer

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210622162339.761651-6-andrey.grodzovsky@amd.com
drivers/gpu/drm/ttm/ttm_bo.c

index db53fecca696a56f1b5c8d3e3b5060f003edb7b7..6c78149360c42a058c36d39b36fd9f9cfc07d7bf 100644 (file)
@@ -485,6 +485,31 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_device *bdev, int resched)
 }
 EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue);
 
+static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
+                                    struct ttm_resource **mem,
+                                    struct ttm_operation_ctx *ctx,
+                                    struct ttm_place *hop)
+{
+       struct ttm_placement hop_placement;
+       struct ttm_resource *hop_mem;
+       int ret;
+
+       hop_placement.num_placement = hop_placement.num_busy_placement = 1;
+       hop_placement.placement = hop_placement.busy_placement = hop;
+
+       /* find space in the bounce domain */
+       ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx);
+       if (ret)
+               return ret;
+       /* move to the bounce domain */
+       ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL);
+       if (ret) {
+               ttm_resource_free(bo, &hop_mem);
+               return ret;
+       }
+       return 0;
+}
+
 static int ttm_bo_evict(struct ttm_buffer_object *bo,
                        struct ttm_operation_ctx *ctx)
 {
@@ -524,12 +549,17 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
                goto out;
        }
 
+bounce:
        ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop);
-       if (unlikely(ret)) {
-               WARN(ret == -EMULTIHOP, "Unexpected multihop in eviction - likely driver bug\n");
-               if (ret != -ERESTARTSYS)
+       if (ret == -EMULTIHOP) {
+               ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop);
+               if (ret) {
                        pr_err("Buffer eviction failed\n");
-               ttm_resource_free(bo, &evict_mem);
+                       ttm_resource_free(bo, &evict_mem);
+                       goto out;
+               }
+               /* try and move to final place now. */
+               goto bounce;
        }
 out:
        return ret;
@@ -844,31 +874,6 @@ error:
 }
 EXPORT_SYMBOL(ttm_bo_mem_space);
 
-static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
-                                    struct ttm_resource **mem,
-                                    struct ttm_operation_ctx *ctx,
-                                    struct ttm_place *hop)
-{
-       struct ttm_placement hop_placement;
-       struct ttm_resource *hop_mem;
-       int ret;
-
-       hop_placement.num_placement = hop_placement.num_busy_placement = 1;
-       hop_placement.placement = hop_placement.busy_placement = hop;
-
-       /* find space in the bounce domain */
-       ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx);
-       if (ret)
-               return ret;
-       /* move to the bounce domain */
-       ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL);
-       if (ret) {
-               ttm_resource_free(bo, &hop_mem);
-               return ret;
-       }
-       return 0;
-}
-
 static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
                              struct ttm_placement *placement,
                              struct ttm_operation_ctx *ctx)