[PATCH 5.15 24/83] drm/amdgpu: Add amdgpu suspend-resume code path under SRIOV

From: Greg Kroah-Hartman
Date: Mon Oct 03 2022 - 04:50:44 EST


From: Bokun Zhang <Bokun.Zhang@xxxxxxx>

commit 3b7329cf5a767c1be38352d43066012e220ad43c upstream.

- Under SRIOV, we need to send REQ_GPU_FINI to the hypervisor
during the suspend time. Furthermore, we cannot request a
mode 1 reset under SRIOV as VF. Therefore, we will skip it
as it is called in suspend_noirq() function.

- In the resume code path, we need to send REQ_GPU_INIT to the
hypervisor and also resume PSP IP block under SRIOV.

Signed-off-by: Bokun Zhang <Bokun.Zhang@xxxxxxx>
Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 ++++++++++++++++++++++++++-
2 files changed, 30 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1056,6 +1056,10 @@ bool amdgpu_acpi_should_gpu_reset(struct
{
if (adev->flags & AMD_IS_APU)
return false;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3137,7 +3137,8 @@ static int amdgpu_device_ip_resume_phase
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {

r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
@@ -4001,12 +4002,20 @@ static void amdgpu_device_evict_resource
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ int r = 0;

if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;

adev->in_suspend = true;

+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ r = amdgpu_virt_request_full_gpu(adev, false);
+ if (r)
+ return r;
+ }
+
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
DRM_WARN("smart shift update failed\n");

@@ -4035,6 +4044,9 @@ int amdgpu_device_suspend(struct drm_dev
*/
amdgpu_device_evict_resources(adev);

+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, false);
+
return 0;
}

@@ -4053,6 +4065,12 @@ int amdgpu_device_resume(struct drm_devi
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;

+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ if (r)
+ return r;
+ }
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;

@@ -4067,6 +4085,13 @@ int amdgpu_device_resume(struct drm_devi
}

r = amdgpu_device_ip_resume(adev);
+
+ /* no matter what r is, always need to properly release full GPU */
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_data_exchange(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
+ }
+
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
return r;