Re: [PATCH v5 2/7] drm/amdgpu: only use working sdma schedulers for ttm

From: Christian König

Date: Tue Apr 07 2026 - 04:12:23 EST


On 4/3/26 10:35, Pierre-Eric Pelloux-Prayer wrote:
> It's possible that some sdma instances aren't working so we shouldn't try
> to use them from TTM. To achieve this, delay the call to
> amdgpu_sdma_set_buffer_funcs_scheds after the rings have been tested, and
> then use the 'ready' property to decide if a sched should be used or not.
>
> Note that currently it's not doing much, because if the ring helper fails
> for any ring, the whole sdma block init fails.
>
> ---
> v5: check buffer_funcs_enabled from amdgpu_ttm_access_memory_sdma
> ---
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>

Reviewed-by: Christian König <christian.koenig@xxxxxxx>

> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 23 ++++++++++++++++-------
> drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 10 ++++++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 5 +++--
> drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 5 +++--
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 9 +++++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 9 +++++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c | 9 +++++++--
> drivers/gpu/drm/amd/amdgpu/si_dma.c | 9 +++++++--
> 12 files changed, 68 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4ba7321b75e3..bd32113292ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1560,7 +1560,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
> if (!adev->mman.sdma_access_ptr)
> return -EACCES;
>
> - if (!drm_dev_enter(adev_to_drm(adev), &idx))
> + if (!adev->mman.buffer_funcs_enabled || !drm_dev_enter(adev_to_drm(adev), &idx))
> return -ENODEV;
>
> if (write)
> @@ -2351,8 +2351,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> if (enable) {
> struct drm_gpu_scheduler *sched;
>
> - if (!adev->mman.num_buffer_funcs_scheds ||
> - !adev->mman.buffer_funcs_scheds[0]->ready) {
> + if (!adev->mman.num_buffer_funcs_scheds) {
> dev_warn(adev->dev, "Not enabling DMA transfers for in kernel use");
> return;
> }
> @@ -2734,20 +2733,30 @@ void amdgpu_sdma_set_buffer_funcs_scheds(struct amdgpu_device *adev,
> {
> struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
> struct drm_gpu_scheduler *sched;
> - int i;
> + int i, n;
>
> adev->mman.buffer_funcs = buffer_funcs;
>
> - for (i = 0; i < adev->sdma.num_instances; i++) {
> + for (i = 0, n = 0; i < adev->sdma.num_instances; i++) {
> if (adev->sdma.has_page_queue)
> sched = &adev->sdma.instance[i].page.sched;
> else
> sched = &adev->sdma.instance[i].ring.sched;
> - adev->mman.buffer_funcs_scheds[i] = sched;
> +
> + if (!sched->ready)
> + continue;
> +
> + adev->mman.buffer_funcs_scheds[n++] = sched;
> + }
> +
> + if (n == 0) {
> + adev->mman.num_buffer_funcs_scheds = 0;
> + drm_warn(&adev->ddev, "No working sdma ring available\n");
> + return;
> }
>
> adev->mman.num_buffer_funcs_scheds = hub->sdma_invalidation_workaround ?
> - 1 : adev->sdma.num_instances;
> + 1 : n;
> }
>
> #if defined(CONFIG_DEBUG_FS)
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index 26276dcfd458..120da838ac28 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -939,7 +939,6 @@ static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
>
> cik_sdma_set_ring_funcs(adev);
> cik_sdma_set_irq_funcs(adev);
> - cik_sdma_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &cik_sdma_vm_pte_funcs);
>
> return 0;
> @@ -1000,8 +999,15 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
> static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int r;
>
> - return cik_sdma_start(adev);
> + r = cik_sdma_start(adev);
> + if (r)
> + return r;
> +
> + cik_sdma_set_buffer_funcs(adev);
> +
> + return 0;
> }
>
> static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index c6a059ca59e5..93ec52c1f367 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -828,7 +828,6 @@ static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block)
> return r;
>
> sdma_v2_4_set_ring_funcs(adev);
> - sdma_v2_4_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v2_4_vm_pte_funcs);
> sdma_v2_4_set_irq_funcs(adev);
>
> @@ -898,7 +897,9 @@ static int sdma_v2_4_hw_init(struct amdgpu_ip_block *ip_block)
> if (r)
> return r;
>
> - return r;
> + sdma_v2_4_set_buffer_funcs(adev);
> +
> + return 0;
> }
>
> static int sdma_v2_4_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index cb516a25210d..3fde9be74690 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -1108,7 +1108,6 @@ static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block)
> return r;
>
> sdma_v3_0_set_ring_funcs(adev);
> - sdma_v3_0_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v3_0_vm_pte_funcs);
> sdma_v3_0_set_irq_funcs(adev);
>
> @@ -1184,7 +1183,9 @@ static int sdma_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
> if (r)
> return r;
>
> - return r;
> + sdma_v3_0_set_buffer_funcs(adev);
> +
> + return 0;
> }
>
> static int sdma_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d56be26f216b..8a2a4e61867e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1775,7 +1775,6 @@ static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block)
> adev->sdma.has_page_queue = true;
>
> sdma_v4_0_set_ring_funcs(adev);
> - sdma_v4_0_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v4_0_vm_pte_funcs);
> sdma_v4_0_set_irq_funcs(adev);
> sdma_v4_0_set_ras_funcs(adev);
> @@ -1961,6 +1960,7 @@ static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
> static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int r;
>
> if (adev->flags & AMD_IS_APU)
> amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0);
> @@ -1968,7 +1968,12 @@ static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
> if (!amdgpu_sriov_vf(adev))
> sdma_v4_0_init_golden_registers(adev);
>
> - return sdma_v4_0_start(adev);
> + r = sdma_v4_0_start(adev);
> + if (r)
> + return r;
> + sdma_v4_0_set_buffer_funcs(adev);
> +
> + return 0;
> }
>
> static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index 67e9697301b4..88428b88e00f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -1368,7 +1368,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
> adev->sdma.has_page_queue = true;
>
> sdma_v4_4_2_set_ring_funcs(adev);
> - sdma_v4_4_2_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v4_4_2_vm_pte_funcs);
> sdma_v4_4_2_set_irq_funcs(adev);
> sdma_v4_4_2_set_ras_funcs(adev);
> @@ -1568,8 +1567,11 @@ static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block)
> sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
>
> r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
> + if (r)
> + return r;
> + sdma_v4_4_2_set_buffer_funcs(adev);
>
> - return r;
> + return 0;
> }
>
> static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 86f5eb784d57..fa02907217e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1373,7 +1373,6 @@ static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
> return r;
>
> sdma_v5_0_set_ring_funcs(adev);
> - sdma_v5_0_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v5_0_vm_pte_funcs);
> sdma_v5_0_set_irq_funcs(adev);
> sdma_v5_0_set_mqd_funcs(adev);
> @@ -1472,8 +1471,11 @@ static int sdma_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
> sdma_v5_0_init_golden_registers(adev);
>
> r = sdma_v5_0_start(adev);
> + if (r)
> + return r;
> + sdma_v5_0_set_buffer_funcs(adev);
>
> - return r;
> + return 0;
> }
>
> static int sdma_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 3fec838374b2..f6ecbc524c9b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1264,7 +1264,6 @@ static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
> return r;
>
> sdma_v5_2_set_ring_funcs(adev);
> - sdma_v5_2_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v5_2_vm_pte_funcs);
> sdma_v5_2_set_irq_funcs(adev);
> sdma_v5_2_set_mqd_funcs(adev);
> @@ -1385,8 +1384,14 @@ static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
> static int sdma_v5_2_hw_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int r;
>
> - return sdma_v5_2_start(adev);
> + r = sdma_v5_2_start(adev);
> + if (r)
> + return r;
> + sdma_v5_2_set_buffer_funcs(adev);
> +
> + return 0;
> }
>
> static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index 064508cecd11..ee83d3e5978e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -1313,7 +1313,6 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
> return r;
>
> sdma_v6_0_set_ring_funcs(adev);
> - sdma_v6_0_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v6_0_vm_pte_funcs);
> sdma_v6_0_set_irq_funcs(adev);
> sdma_v6_0_set_mqd_funcs(adev);
> @@ -1477,6 +1476,7 @@ static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
> r = sdma_v6_0_start(adev);
> if (r)
> return r;
> + sdma_v6_0_set_buffer_funcs(adev);
>
> return sdma_v6_0_set_userq_trap_interrupts(adev, true);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index 60447729271e..c8343981c596 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -1299,7 +1299,6 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
> }
>
> sdma_v7_0_set_ring_funcs(adev);
> - sdma_v7_0_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_0_vm_pte_funcs);
> sdma_v7_0_set_irq_funcs(adev);
> sdma_v7_0_set_mqd_funcs(adev);
> @@ -1432,6 +1431,7 @@ static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
> r = sdma_v7_0_start(adev);
> if (r)
> return r;
> + sdma_v7_0_set_buffer_funcs(adev);
>
> return sdma_v7_0_set_userq_trap_interrupts(adev, true);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
> index c3428d2731dc..b1ca95c2623b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
> @@ -1275,7 +1275,6 @@ static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
> }
>
> sdma_v7_1_set_ring_funcs(adev);
> - sdma_v7_1_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_1_vm_pte_funcs);
> sdma_v7_1_set_irq_funcs(adev);
> sdma_v7_1_set_mqd_funcs(adev);
> @@ -1374,10 +1373,16 @@ static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> uint32_t inst_mask;
> + int r;
>
> inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
>
> - return sdma_v7_1_inst_start(adev, inst_mask);
> + r = sdma_v7_1_inst_start(adev, inst_mask);
> + if (r)
> + return r;
> + sdma_v7_1_set_buffer_funcs(adev);
> +
> + return 0;
> }
>
> static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> index 155067c20a0e..549708075eb4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> @@ -487,7 +487,6 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
> adev->sdma.num_instances = SDMA_MAX_INSTANCE;
>
> si_dma_set_ring_funcs(adev);
> - si_dma_set_buffer_funcs(adev);
> amdgpu_sdma_set_vm_pte_scheds(adev, &si_dma_vm_pte_funcs);
> si_dma_set_irq_funcs(adev);
>
> @@ -543,8 +542,14 @@ static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block)
> static int si_dma_hw_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int r;
>
> - return si_dma_start(adev);
> + r = si_dma_start(adev);
> + if (r)
> + return r;
> + si_dma_set_buffer_funcs(adev);
> +
> + return 0;
> }
>
> static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block)