drm/amdgpu: revert context to stop engine before mode2 reset

For some hang caused by slow tests, engine cannot be stopped which
may cause resume failure after reset. In this case, force halt
engine by reverting context addresses

Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Victor Zhao
2022-06-24 11:59:21 +08:00
committed by Alex Deucher
parent bfaced6ee7
commit 72fadb1367
4 changed files with 40 additions and 0 deletions

View File

@@ -5039,6 +5039,7 @@ static void amdgpu_device_recheck_guilty_jobs(
/* set guilty */
drm_sched_increase_karma(s_job);
amdgpu_reset_prepare_hwcontext(adev, reset_context);
retry:
/* do hw reset */
if (amdgpu_sriov_vf(adev)) {

View File

@@ -37,6 +37,7 @@ struct amdgpu_gfxhub_funcs {
void (*utcl2_harvest)(struct amdgpu_device *adev);
void (*mode2_save_regs)(struct amdgpu_device *adev);
void (*mode2_restore_regs)(struct amdgpu_device *adev);
void (*halt)(struct amdgpu_device *adev);
};
struct amdgpu_gfxhub {

View File

@@ -646,6 +646,41 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL);
}
static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
int i;
uint32_t tmp;
int time = 1000;
gfxhub_v2_1_set_fault_enable_default(adev, false);
for (i = 0; i <= 14; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, ~0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, ~0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
0);
}
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK |
GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 &&
time) {
udelay(100);
time--;
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
}
if (!time) {
DRM_WARN("failed to wait for GRBM(EA) idle\n");
}
}
const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.get_fb_location = gfxhub_v2_1_get_fb_location,
.get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset,
@@ -658,4 +693,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
.mode2_save_regs = gfxhub_v2_1_save_regs,
.mode2_restore_regs = gfxhub_v2_1_restore_regs,
.halt = gfxhub_v2_1_halt,
};

View File

@@ -97,6 +97,8 @@ sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
if (!amdgpu_sriov_vf(adev)) {
if (adev->gfxhub.funcs->mode2_save_regs)
adev->gfxhub.funcs->mode2_save_regs(adev);
if (adev->gfxhub.funcs->halt)
adev->gfxhub.funcs->halt(adev);
r = sienna_cichlid_mode2_suspend_ip(adev);
}