drm/radeon/kms: Schedule host path read cache flush through the ring V2
R300 family will hard lockup if host path read cache flush is done through MMIO to HOST_PATH_CNTL. But scheduling same flush through ring seems harmless. This patch remove the hdp_flush callback and add a flush after each fence emission which means a flush after each IB schedule. Thus we should have same behavior without the hard lockup. Tested on R100,R200,R300,R400,R500,R600,R700 family. V2: Adjust fence counts in r600_blit_prepare_copy() Signed-off-by: Jerome Glisse <jglisse@redhat.com> Reviewed-by: Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
62cdc0c206
commit
cafe6609d6
13 changed files with 33 additions and 32 deletions
|
@ -356,6 +356,11 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
|
|||
/* Wait until IDLE & CLEAN */
|
||||
radeon_ring_write(rdev, PACKET0(0x1720, 0));
|
||||
radeon_ring_write(rdev, (1 << 16) | (1 << 17));
|
||||
radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
|
||||
radeon_ring_write(rdev, rdev->config.r100.hdp_cntl |
|
||||
RADEON_HDP_READ_BUFFER_INVALIDATE);
|
||||
radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
|
||||
radeon_ring_write(rdev, rdev->config.r100.hdp_cntl);
|
||||
/* Emit fence sequence & fire IRQ */
|
||||
radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
|
||||
radeon_ring_write(rdev, fence->seq);
|
||||
|
@ -1713,14 +1718,6 @@ void r100_gpu_init(struct radeon_device *rdev)
|
|||
r100_hdp_reset(rdev);
|
||||
}
|
||||
|
||||
void r100_hdp_flush(struct radeon_device *rdev)
|
||||
{
|
||||
u32 tmp;
|
||||
tmp = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
tmp |= RADEON_HDP_READ_BUFFER_INVALIDATE;
|
||||
WREG32(RADEON_HOST_PATH_CNTL, tmp);
|
||||
}
|
||||
|
||||
void r100_hdp_reset(struct radeon_device *rdev)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
@ -3313,6 +3310,7 @@ static int r100_startup(struct radeon_device *rdev)
|
|||
}
|
||||
/* Enable IRQ */
|
||||
r100_irq_set(rdev);
|
||||
rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
|
@ -36,7 +36,15 @@
|
|||
#include "rv350d.h"
|
||||
#include "r300_reg_safe.h"
|
||||
|
||||
/* This files gather functions specifics to: r300,r350,rv350,rv370,rv380 */
|
||||
/* This files gather functions specifics to: r300,r350,rv350,rv370,rv380
|
||||
*
|
||||
* GPU Errata:
|
||||
* - HOST_PATH_CNTL: r300 family seems to dislike write to HOST_PATH_CNTL
|
||||
* using MMIO to flush host path read cache, this lead to HARDLOCKUP.
|
||||
* However, scheduling such write to the ring seems harmless, i suspect
|
||||
* the CP read collide with the flush somehow, or maybe the MC, hard to
|
||||
* tell. (Jerome Glisse)
|
||||
*/
|
||||
|
||||
/*
|
||||
* rv370,rv380 PCIE GART
|
||||
|
@ -178,6 +186,11 @@ void r300_fence_ring_emit(struct radeon_device *rdev,
|
|||
/* Wait until IDLE & CLEAN */
|
||||
radeon_ring_write(rdev, PACKET0(0x1720, 0));
|
||||
radeon_ring_write(rdev, (1 << 17) | (1 << 16) | (1 << 9));
|
||||
radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
|
||||
radeon_ring_write(rdev, rdev->config.r300.hdp_cntl |
|
||||
RADEON_HDP_READ_BUFFER_INVALIDATE);
|
||||
radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
|
||||
radeon_ring_write(rdev, rdev->config.r300.hdp_cntl);
|
||||
/* Emit fence sequence & fire IRQ */
|
||||
radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
|
||||
radeon_ring_write(rdev, fence->seq);
|
||||
|
@ -1258,6 +1271,7 @@ static int r300_startup(struct radeon_device *rdev)
|
|||
}
|
||||
/* Enable IRQ */
|
||||
r100_irq_set(rdev);
|
||||
rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
|
@ -219,6 +219,7 @@ static int r420_startup(struct radeon_device *rdev)
|
|||
r420_pipes_init(rdev);
|
||||
/* Enable IRQ */
|
||||
r100_irq_set(rdev);
|
||||
rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
|
@ -186,6 +186,7 @@ static int r520_startup(struct radeon_device *rdev)
|
|||
}
|
||||
/* Enable IRQ */
|
||||
rs600_irq_set(rdev);
|
||||
rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
|
@ -1388,11 +1388,6 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
|
|||
(void)RREG32(PCIE_PORT_DATA);
|
||||
}
|
||||
|
||||
void r600_hdp_flush(struct radeon_device *rdev)
|
||||
{
|
||||
WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
|
||||
}
|
||||
|
||||
/*
|
||||
* CP & Ring
|
||||
*/
|
||||
|
@ -1789,6 +1784,8 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
|
|||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
|
||||
radeon_ring_write(rdev, fence->seq);
|
||||
radeon_ring_write(rdev, PACKET0(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
|
||||
radeon_ring_write(rdev, 1);
|
||||
/* CP_INTERRUPT packet 3 no longer exists, use packet 0 */
|
||||
radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0));
|
||||
radeon_ring_write(rdev, RB_INT_STAT);
|
||||
|
|
|
@ -577,9 +577,9 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
|
|||
ring_size = num_loops * dwords_per_loop;
|
||||
/* set default + shaders */
|
||||
ring_size += 40; /* shaders + def state */
|
||||
ring_size += 5; /* fence emit for VB IB */
|
||||
ring_size += 7; /* fence emit for VB IB */
|
||||
ring_size += 5; /* done copy */
|
||||
ring_size += 5; /* fence emit for done copy */
|
||||
ring_size += 7; /* fence emit for done copy */
|
||||
r = radeon_ring_lock(rdev, ring_size);
|
||||
WARN_ON(r);
|
||||
|
||||
|
|
|
@ -656,7 +656,6 @@ struct radeon_asic {
|
|||
uint32_t offset, uint32_t obj_size);
|
||||
int (*clear_surface_reg)(struct radeon_device *rdev, int reg);
|
||||
void (*bandwidth_update)(struct radeon_device *rdev);
|
||||
void (*hdp_flush)(struct radeon_device *rdev);
|
||||
void (*hpd_init)(struct radeon_device *rdev);
|
||||
void (*hpd_fini)(struct radeon_device *rdev);
|
||||
bool (*hpd_sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
||||
|
@ -669,12 +668,14 @@ struct radeon_asic {
|
|||
struct r100_asic {
|
||||
const unsigned *reg_safe_bm;
|
||||
unsigned reg_safe_bm_size;
|
||||
u32 hdp_cntl;
|
||||
};
|
||||
|
||||
struct r300_asic {
|
||||
const unsigned *reg_safe_bm;
|
||||
unsigned reg_safe_bm_size;
|
||||
u32 resync_scratch;
|
||||
u32 hdp_cntl;
|
||||
};
|
||||
|
||||
struct r600_asic {
|
||||
|
@ -1010,7 +1011,6 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
|
|||
#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
|
||||
#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r)))
|
||||
#define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev))
|
||||
#define radeon_hdp_flush(rdev) (rdev)->asic->hdp_flush((rdev))
|
||||
#define radeon_hpd_init(rdev) (rdev)->asic->hpd_init((rdev))
|
||||
#define radeon_hpd_fini(rdev) (rdev)->asic->hpd_fini((rdev))
|
||||
#define radeon_hpd_sense(rdev, hpd) (rdev)->asic->hpd_sense((rdev), (hpd))
|
||||
|
|
|
@ -77,7 +77,6 @@ int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
|
|||
void r100_bandwidth_update(struct radeon_device *rdev);
|
||||
void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
|
||||
int r100_ring_test(struct radeon_device *rdev);
|
||||
void r100_hdp_flush(struct radeon_device *rdev);
|
||||
void r100_hpd_init(struct radeon_device *rdev);
|
||||
void r100_hpd_fini(struct radeon_device *rdev);
|
||||
bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
||||
|
@ -114,7 +113,6 @@ static struct radeon_asic r100_asic = {
|
|||
.set_surface_reg = r100_set_surface_reg,
|
||||
.clear_surface_reg = r100_clear_surface_reg,
|
||||
.bandwidth_update = &r100_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &r100_hpd_init,
|
||||
.hpd_fini = &r100_hpd_fini,
|
||||
.hpd_sense = &r100_hpd_sense,
|
||||
|
@ -174,7 +172,6 @@ static struct radeon_asic r300_asic = {
|
|||
.set_surface_reg = r100_set_surface_reg,
|
||||
.clear_surface_reg = r100_clear_surface_reg,
|
||||
.bandwidth_update = &r100_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &r100_hpd_init,
|
||||
.hpd_fini = &r100_hpd_fini,
|
||||
.hpd_sense = &r100_hpd_sense,
|
||||
|
@ -218,7 +215,6 @@ static struct radeon_asic r420_asic = {
|
|||
.set_surface_reg = r100_set_surface_reg,
|
||||
.clear_surface_reg = r100_clear_surface_reg,
|
||||
.bandwidth_update = &r100_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &r100_hpd_init,
|
||||
.hpd_fini = &r100_hpd_fini,
|
||||
.hpd_sense = &r100_hpd_sense,
|
||||
|
@ -267,7 +263,6 @@ static struct radeon_asic rs400_asic = {
|
|||
.set_surface_reg = r100_set_surface_reg,
|
||||
.clear_surface_reg = r100_clear_surface_reg,
|
||||
.bandwidth_update = &r100_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &r100_hpd_init,
|
||||
.hpd_fini = &r100_hpd_fini,
|
||||
.hpd_sense = &r100_hpd_sense,
|
||||
|
@ -324,7 +319,6 @@ static struct radeon_asic rs600_asic = {
|
|||
.set_pcie_lanes = NULL,
|
||||
.set_clock_gating = &radeon_atom_set_clock_gating,
|
||||
.bandwidth_update = &rs600_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &rs600_hpd_init,
|
||||
.hpd_fini = &rs600_hpd_fini,
|
||||
.hpd_sense = &rs600_hpd_sense,
|
||||
|
@ -372,7 +366,6 @@ static struct radeon_asic rs690_asic = {
|
|||
.set_surface_reg = r100_set_surface_reg,
|
||||
.clear_surface_reg = r100_clear_surface_reg,
|
||||
.bandwidth_update = &rs690_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &rs600_hpd_init,
|
||||
.hpd_fini = &rs600_hpd_fini,
|
||||
.hpd_sense = &rs600_hpd_sense,
|
||||
|
@ -424,7 +417,6 @@ static struct radeon_asic rv515_asic = {
|
|||
.set_surface_reg = r100_set_surface_reg,
|
||||
.clear_surface_reg = r100_clear_surface_reg,
|
||||
.bandwidth_update = &rv515_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &rs600_hpd_init,
|
||||
.hpd_fini = &rs600_hpd_fini,
|
||||
.hpd_sense = &rs600_hpd_sense,
|
||||
|
@ -467,7 +459,6 @@ static struct radeon_asic r520_asic = {
|
|||
.set_surface_reg = r100_set_surface_reg,
|
||||
.clear_surface_reg = r100_clear_surface_reg,
|
||||
.bandwidth_update = &rv515_bandwidth_update,
|
||||
.hdp_flush = &r100_hdp_flush,
|
||||
.hpd_init = &rs600_hpd_init,
|
||||
.hpd_fini = &rs600_hpd_fini,
|
||||
.hpd_sense = &rs600_hpd_sense,
|
||||
|
@ -508,7 +499,6 @@ int r600_ring_test(struct radeon_device *rdev);
|
|||
int r600_copy_blit(struct radeon_device *rdev,
|
||||
uint64_t src_offset, uint64_t dst_offset,
|
||||
unsigned num_pages, struct radeon_fence *fence);
|
||||
void r600_hdp_flush(struct radeon_device *rdev);
|
||||
void r600_hpd_init(struct radeon_device *rdev);
|
||||
void r600_hpd_fini(struct radeon_device *rdev);
|
||||
bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
||||
|
@ -544,7 +534,6 @@ static struct radeon_asic r600_asic = {
|
|||
.set_surface_reg = r600_set_surface_reg,
|
||||
.clear_surface_reg = r600_clear_surface_reg,
|
||||
.bandwidth_update = &rv515_bandwidth_update,
|
||||
.hdp_flush = &r600_hdp_flush,
|
||||
.hpd_init = &r600_hpd_init,
|
||||
.hpd_fini = &r600_hpd_fini,
|
||||
.hpd_sense = &r600_hpd_sense,
|
||||
|
@ -589,7 +578,6 @@ static struct radeon_asic rv770_asic = {
|
|||
.set_surface_reg = r600_set_surface_reg,
|
||||
.clear_surface_reg = r600_clear_surface_reg,
|
||||
.bandwidth_update = &rv515_bandwidth_update,
|
||||
.hdp_flush = &r600_hdp_flush,
|
||||
.hpd_init = &r600_hpd_init,
|
||||
.hpd_fini = &r600_hpd_fini,
|
||||
.hpd_sense = &r600_hpd_sense,
|
||||
|
|
|
@ -131,7 +131,6 @@ int radeon_gem_set_domain(struct drm_gem_object *gobj,
|
|||
printk(KERN_ERR "Failed to wait for object !\n");
|
||||
return r;
|
||||
}
|
||||
radeon_hdp_flush(robj->rdev);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -312,7 +311,6 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
|
|||
mutex_lock(&dev->struct_mutex);
|
||||
drm_gem_object_unreference(gobj);
|
||||
mutex_unlock(&dev->struct_mutex);
|
||||
radeon_hdp_flush(robj->rdev);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
@ -396,6 +396,7 @@ static int rs400_startup(struct radeon_device *rdev)
|
|||
return r;
|
||||
/* Enable IRQ */
|
||||
r100_irq_set(rdev);
|
||||
rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
|
@ -554,6 +554,7 @@ static int rs600_startup(struct radeon_device *rdev)
|
|||
return r;
|
||||
/* Enable IRQ */
|
||||
rs600_irq_set(rdev);
|
||||
rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
|
@ -626,6 +626,7 @@ static int rs690_startup(struct radeon_device *rdev)
|
|||
return r;
|
||||
/* Enable IRQ */
|
||||
rs600_irq_set(rdev);
|
||||
rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
|
@ -479,6 +479,7 @@ static int rv515_startup(struct radeon_device *rdev)
|
|||
}
|
||||
/* Enable IRQ */
|
||||
rs600_irq_set(rdev);
|
||||
rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
|
||||
/* 1M ring buffer */
|
||||
r = r100_cp_init(rdev, 1024 * 1024);
|
||||
if (r) {
|
||||
|
|
Loading…
Reference in a new issue