mirror of
https://github.com/hardkernel/linux.git
synced 2026-03-24 19:40:21 +09:00
drm/amdgpu: Fixed psp fence and memory issues when removing amdgpu device
V3:
Fixed psp fence and memory issues for the asic
using smu v13_0_2 when removing amdgpu device.
[Why]:
1. psp_suspend->psp_free_shared_bufs->
psp_ta_free_shared_buf->
amdgpu_bo_free_kernel->
...->amdgpu_bo_release_notify->
amdgpu_fill_buffer
psp will free vram memory used by psp when psp_suspend
is called. But for the asic using smu v13_0_2, because
psp_suspend is called before adev->shutdown is set to
true when removing the first hive device, amdgpu fill_buffer
will be called, which will cause fence issues when evicting
all vram resources in amdgpu vram mgr_fini.
2. Since psp_hw_fini is not called after calling psp_suspend
and psp_suspend only calls psp_ring_stop, the psp ring memory
will not be released when amdgpu device is removed.
[How]:
1. Set shutdown to true before calling amdgpu_device_gpu_recover,
then amdgpu_fill_buffer will not be called when psp_suspend is
called.
2. Free psp ring memory in psp_sw_fini.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
f5c7e77970
commit
83d29a5f8a
@@ -5191,8 +5191,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
*/
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
list_add_tail(&tmp_adev->reset_list, &device_list);
|
||||
if (gpu_reset_for_dev_remove && adev->shutdown)
|
||||
tmp_adev->shutdown = true;
|
||||
}
|
||||
if (!list_is_first(&adev->reset_list, &device_list))
|
||||
list_rotate_to_front(&adev->reset_list, &device_list);
|
||||
device_list_handle = &device_list;
|
||||
|
||||
@@ -2207,6 +2207,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
|
||||
if (need_to_reset_gpu) {
|
||||
struct amdgpu_reset_context reset_context;
|
||||
|
||||
adev->shutdown = true;
|
||||
memset(&reset_context, 0, sizeof(reset_context));
|
||||
reset_context.method = AMD_RESET_METHOD_NONE;
|
||||
reset_context.reset_req_dev = adev;
|
||||
|
||||
@@ -511,6 +511,11 @@ static int psp_sw_fini(void *handle)
|
||||
kfree(cmd);
|
||||
cmd = NULL;
|
||||
|
||||
if (psp->km_ring.ring_mem)
|
||||
amdgpu_bo_free_kernel(&adev->firmware.rbuf,
|
||||
&psp->km_ring.ring_mem_mc_addr,
|
||||
(void **)&psp->km_ring.ring_mem);
|
||||
|
||||
amdgpu_bo_free_kernel(&psp->fw_pri_bo,
|
||||
&psp->fw_pri_mc_addr, &psp->fw_pri_buf);
|
||||
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
|
||||
|
||||
Reference in New Issue
Block a user