mirror of
https://github.com/hardkernel/linux.git
synced 2026-03-25 20:10:23 +09:00
drm/amdgpu: release gpu full access after "amdgpu_device_ip_late_init"
[ Upstream commit 38eecbe086 ]
[WHY]
Function "amdgpu_irq_update()" called by "amdgpu_device_ip_late_init()" is an atomic context.
We shouldn't access registers through KIQ since "msleep()" may be called in "amdgpu_kiq_rreg()".
[HOW]
Move function "amdgpu_virt_release_full_gpu()" after function "amdgpu_device_ip_late_init()",
to ensure that registers be accessed through RLCG instead of KIQ.
Call Trace:
<TASK>
show_stack+0x52/0x69
dump_stack_lvl+0x49/0x6d
dump_stack+0x10/0x18
__schedule_bug.cold+0x4f/0x6b
__schedule+0x473/0x5d0
? __wake_up_klogd.part.0+0x40/0x70
? vprintk_emit+0xbe/0x1f0
schedule+0x68/0x110
schedule_timeout+0x87/0x160
? timer_migration_handler+0xa0/0xa0
msleep+0x2d/0x50
amdgpu_kiq_rreg+0x18d/0x1f0 [amdgpu]
amdgpu_device_rreg.part.0+0x59/0xd0 [amdgpu]
amdgpu_device_rreg+0x3a/0x50 [amdgpu]
amdgpu_sriov_rreg+0x3c/0xb0 [amdgpu]
gfx_v10_0_set_gfx_eop_interrupt_state.constprop.0+0x16c/0x190 [amdgpu]
gfx_v10_0_set_eop_interrupt_state+0xa5/0xb0 [amdgpu]
amdgpu_irq_update+0x53/0x80 [amdgpu]
amdgpu_irq_get+0x7c/0xb0 [amdgpu]
amdgpu_fence_driver_hw_init+0x58/0x90 [amdgpu]
amdgpu_device_init.cold+0x16b7/0x2022 [amdgpu]
Signed-off-by: Chong Li <chongli2@amd.com>
Reviewed-by: JingWen.Chen2@amd.com
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
8ac106aade
commit
6a7bf00389
@@ -2509,8 +2509,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
amdgpu_fru_get_product_info(adev);
|
||||
|
||||
init_failed:
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
amdgpu_virt_release_full_gpu(adev, true);
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -3755,18 +3753,6 @@ fence_driver_init:
|
||||
|
||||
r = amdgpu_device_ip_init(adev);
|
||||
if (r) {
|
||||
/* failed in exclusive mode due to timeout */
|
||||
if (amdgpu_sriov_vf(adev) &&
|
||||
!amdgpu_sriov_runtime(adev) &&
|
||||
amdgpu_virt_mmio_blocked(adev) &&
|
||||
!amdgpu_virt_wait_reset(adev)) {
|
||||
dev_err(adev->dev, "VF exclusive mode timeout\n");
|
||||
/* Don't send request since VF is inactive. */
|
||||
adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
|
||||
adev->virt.ops = NULL;
|
||||
r = -EAGAIN;
|
||||
goto release_ras_con;
|
||||
}
|
||||
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
|
||||
goto release_ras_con;
|
||||
@@ -3845,8 +3831,10 @@ fence_driver_init:
|
||||
msecs_to_jiffies(AMDGPU_RESUME_MS));
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
amdgpu_virt_release_full_gpu(adev, true);
|
||||
flush_delayed_work(&adev->delayed_init_work);
|
||||
}
|
||||
|
||||
r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
|
||||
if (r)
|
||||
@@ -3881,6 +3869,20 @@ fence_driver_init:
|
||||
return 0;
|
||||
|
||||
release_ras_con:
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
amdgpu_virt_release_full_gpu(adev, true);
|
||||
|
||||
/* failed in exclusive mode due to timeout */
|
||||
if (amdgpu_sriov_vf(adev) &&
|
||||
!amdgpu_sriov_runtime(adev) &&
|
||||
amdgpu_virt_mmio_blocked(adev) &&
|
||||
!amdgpu_virt_wait_reset(adev)) {
|
||||
dev_err(adev->dev, "VF exclusive mode timeout\n");
|
||||
/* Don't send request since VF is inactive. */
|
||||
adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
|
||||
adev->virt.ops = NULL;
|
||||
r = -EAGAIN;
|
||||
}
|
||||
amdgpu_release_ras_context(adev);
|
||||
|
||||
failed:
|
||||
|
||||
Reference in New Issue
Block a user