drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
authorMichel Dänzer <michel.daenzer@amd.com>
Thu, 31 Jul 2014 09:43:49 +0000 (18:43 +0900)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 5 Aug 2014 12:53:45 +0000 (08:53 -0400)
This ensures the GPU sees all previous CPU writes to VRAM, which makes it
safe:

* For userspace to stream data from CPU to GPU via VRAM instead of GTT
* For IBs to be stored in VRAM instead of GTT
* For ring buffers to be stored in VRAM instead of GTT, if the HPD flush
  is performed via MMIO

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_ring.c

index 60c276538ba961d16ca7a13b971c491b44f8101c..afdfe04a92908fc61d1f92c62795e1a4fe0f9beb 100644 (file)
@@ -3890,8 +3890,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
        radeon_ring_write(ring, fence->seq);
        radeon_ring_write(ring, 0);
-       /* HDP flush */
-       cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
 }
 
 /**
@@ -3920,8 +3918,6 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
        radeon_ring_write(ring, upper_32_bits(addr));
        radeon_ring_write(ring, fence->seq);
        radeon_ring_write(ring, 0);
-       /* HDP flush */
-       cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
 }
 
 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
index 5fd242795178ac8b594fc2f03d5b525fb123866a..04b5940b89234893f3f3042e5a00f30f97e899cf 100644 (file)
@@ -837,11 +837,7 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
        /* Wait until IDLE & CLEAN */
        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
        radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
-       radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-       radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
-                               RADEON_HDP_READ_BUFFER_INVALIDATE);
-       radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-       radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
+       r100_ring_hdp_flush(rdev, ring);
        /* Emit fence sequence & fire IRQ */
        radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
        radeon_ring_write(ring, fence->seq);
@@ -1060,6 +1056,20 @@ void r100_gfx_set_wptr(struct radeon_device *rdev,
        (void)RREG32(RADEON_CP_RB_WPTR);
 }
 
+/**
+ * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
+ * rdev: radeon device structure
+ * ring: ring buffer struct for emitting packets
+ */
+void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+       radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+       radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
+                               RADEON_HDP_READ_BUFFER_INVALIDATE);
+       radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+       radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
+}
+
 static void r100_cp_load_microcode(struct radeon_device *rdev)
 {
        const __be32 *fw_data;
index bcc98c1c135ec647a52a7e4606a1cd3bd5faa2a6..29731584b6ae7f59007c1819c9339c445904c14b 100644 (file)
@@ -1749,6 +1749,7 @@ struct radeon_asic_ring {
        /* command emmit functions */
        void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
        void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
+       void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
        bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
                               struct radeon_semaphore *semaphore, bool emit_wait);
        void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
index 5781fde5c1ce5672c3ecd1bb42eb7d253128d6d4..c49a01f92b4de94dc02901715294145637d0cb8f 100644 (file)
@@ -185,6 +185,7 @@ static struct radeon_asic_ring r100_gfx_ring = {
        .get_rptr = &r100_gfx_get_rptr,
        .get_wptr = &r100_gfx_get_wptr,
        .set_wptr = &r100_gfx_set_wptr,
+       .hdp_flush = &r100_ring_hdp_flush,
 };
 
 static struct radeon_asic r100_asic = {
@@ -331,6 +332,7 @@ static struct radeon_asic_ring r300_gfx_ring = {
        .get_rptr = &r100_gfx_get_rptr,
        .get_wptr = &r100_gfx_get_wptr,
        .set_wptr = &r100_gfx_set_wptr,
+       .hdp_flush = &r100_ring_hdp_flush,
 };
 
 static struct radeon_asic r300_asic = {
@@ -1987,7 +1989,7 @@ static struct radeon_asic ci_asic = {
        .resume = &cik_resume,
        .asic_reset = &cik_asic_reset,
        .vga_set_state = &r600_vga_set_state,
-       .mmio_hdp_flush = NULL,
+       .mmio_hdp_flush = &r600_mmio_hdp_flush,
        .gui_idle = &r600_gui_idle,
        .mc_wait_for_idle = &evergreen_mc_wait_for_idle,
        .get_xclk = &cik_get_xclk,
@@ -2091,7 +2093,7 @@ static struct radeon_asic kv_asic = {
        .resume = &cik_resume,
        .asic_reset = &cik_asic_reset,
        .vga_set_state = &r600_vga_set_state,
-       .mmio_hdp_flush = NULL,
+       .mmio_hdp_flush = &r600_mmio_hdp_flush,
        .gui_idle = &r600_gui_idle,
        .mc_wait_for_idle = &evergreen_mc_wait_for_idle,
        .get_xclk = &cik_get_xclk,
index b8826c6556859b9ce53204e472ea3c0635f565cb..3cf6be6666fc52d3c9c269181b3ed0715def76b2 100644 (file)
@@ -148,7 +148,8 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev,
                      struct radeon_ring *ring);
 void r100_gfx_set_wptr(struct radeon_device *rdev,
                       struct radeon_ring *ring);
-
+void r100_ring_hdp_flush(struct radeon_device *rdev,
+                        struct radeon_ring *ring);
 /*
  * r200,rv250,rs300,rv280
  */
index e9e361084249c7deddd0dd680ced4d95a845d31d..54aa293a8175a315ce397fefcbdc2c202871c586 100644 (file)
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
  *   2.39.0 - Add INFO query for number of active CUs
+ *   2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting
+ *            CS to GPU
  */
 #define KMS_DRIVER_MAJOR       2
-#define KMS_DRIVER_MINOR       39
+#define KMS_DRIVER_MINOR       40
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
index 9c86ac947275f034de3bf4ebeaf1a2df5ea098d5..5b4e0cf231a04d0a104868ed590f1324afe70be1 100644 (file)
@@ -183,11 +183,21 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig
  */
 void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
+       /* If we are emitting the HDP flush via the ring buffer, we need to
+        * do it before padding.
+        */
+       if (rdev->asic->ring[ring->idx]->hdp_flush)
+               rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring);
        /* We pad to match fetch size */
        while (ring->wptr & ring->align_mask) {
                radeon_ring_write(ring, ring->nop);
        }
        mb();
+       /* If we are emitting the HDP flush via MMIO, we need to do it after
+        * all CPU writes to VRAM finished.
+        */
+       if (rdev->asic->mmio_hdp_flush)
+               rdev->asic->mmio_hdp_flush(rdev);
        radeon_ring_set_wptr(rdev, ring);
 }