Re: [RFC PATCH 4/4] drm/panthor: Use GPU_COMMAND.FLUSH_CACHES for cache maintenance
From: Steven Price
Date: Mon Dec 23 2024 - 12:05:21 EST
On 19/12/2024 17:05, Karunika Choo wrote:
> This patch adds support for performing cache maintenance operations via
> the GPU_CONTROL.GPU_COMMAND register instead of using FLUSH_PT or
> FLUSH_MEM commands from the AS_COMMAND register. This feature is enabled
> when the HW feature bit (PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH) is
> set.
>
> Signed-off-by: Karunika Choo <karunika.choo@xxxxxxx>
> ---
> drivers/gpu/drm/panthor/panthor_gpu.c | 2 +-
> drivers/gpu/drm/panthor/panthor_hw.c | 3 ++
> drivers/gpu/drm/panthor/panthor_hw.h | 4 +++
> drivers/gpu/drm/panthor/panthor_mmu.c | 46 +++++++++++++++++++++++++--
> 4 files changed, 52 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 9dadcea67a39..30dcb50409dd 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -276,7 +276,7 @@ int panthor_gpu_flush_caches(struct panthor_device *ptdev,
>
> if (!wait_event_timeout(ptdev->gpu->reqs_acked,
> !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
> - msecs_to_jiffies(100))) {
> + msecs_to_jiffies(1000))) {
Unrelated change (or at least not mentioned in the commit message).
> spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
> if ((ptdev->gpu->pending_reqs &
> GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
> diff --git a/drivers/gpu/drm/panthor/panthor_hw.c b/drivers/gpu/drm/panthor/panthor_hw.c
> index 0fb3adc093bc..3738f7fd106e 100644
> --- a/drivers/gpu/drm/panthor/panthor_hw.c
> +++ b/drivers/gpu/drm/panthor/panthor_hw.c
> @@ -20,6 +20,9 @@ static struct panthor_hw panthor_hw_devices[] = {
> [PANTHOR_ARCH_10_8] = {
> .arch_id = GPU_ARCH_ID_MAKE(10, 8, 0),
> .arch_mask = GPU_ARCH_ID_MAKE(0xFF, 0, 0),
> + .features = {
> + BIT(PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH)
> + },
It's a little odd to have a 'feature bit' to declare something that (so
far) every GPU does. Do we need this feature bit?
> .map = {
> .mmu_as_base = ARCH_10_8_MMU_AS_BASE,
> .mmu_as_stride = ARCH_10_8_MMU_AS_STRIDE,
> diff --git a/drivers/gpu/drm/panthor/panthor_hw.h b/drivers/gpu/drm/panthor/panthor_hw.h
> index 3409083d09d0..69fa8f51a8c9 100644
> --- a/drivers/gpu/drm/panthor/panthor_hw.h
> +++ b/drivers/gpu/drm/panthor/panthor_hw.h
> @@ -13,6 +13,10 @@ struct panthor_device;
> * New feature flags will be added with support for newer GPU architectures.
> */
> enum panthor_hw_feature {
> + /** @PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH: Cache maintenance via GPU_CONTROL*/
> + PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH,
> +
> + /** @PANTHOR_HW_FEATURES_END: Number of HW feature bits */
> PANTHOR_HW_FEATURES_END
> };
>
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 8a190dd2e06c..91c420538e02 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -29,6 +29,7 @@
>
> #include "panthor_device.h"
> #include "panthor_gem.h"
> +#include "panthor_gpu.h"
> #include "panthor_heap.h"
> #include "panthor_hw.h"
> #include "panthor_mmu.h"
> @@ -533,12 +534,19 @@ static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd)
>
> /* write AS_COMMAND when MMU is ready to accept another command */
> status = wait_ready(ptdev, as_nr);
> - if (!status)
> - gpu_write(ptdev, MMU_AS(ptdev, as_nr) + AS_COMMAND, cmd);
> + if (status)
> + return status;
> +
> + gpu_write(ptdev, MMU_AS(ptdev, as_nr) + AS_COMMAND, cmd);
Please try to put simple cleanups like this in a separate patch -
there's no functional change here.
Steve
>
> return status;
> }
>
> +static int unlock_region(struct panthor_device *ptdev, u32 as_nr)
> +{
> + return write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK);
> +}
> +
> static void lock_region(struct panthor_device *ptdev, u32 as_nr,
> u64 region_start, u64 size)
> {
> @@ -573,6 +581,36 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr,
> write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
> }
>
> +static int mmu_hw_do_flush_on_gpu_ctrl(struct panthor_device *ptdev, int as_nr,
> + u32 op)
> +{
> + const u32 l2_flush = CACHE_CLEAN | CACHE_INV;
> + u32 lsc_flush = 0;
> + int ret;
> +
> + if (op == AS_COMMAND_FLUSH_MEM)
> + lsc_flush = CACHE_CLEAN | CACHE_INV;
> +
> + ret = wait_ready(ptdev, as_nr);
> + if (ret)
> + return ret;
> +
> + ret = panthor_gpu_flush_caches(ptdev, l2_flush, lsc_flush, 0);
> + if (ret)
> + return ret;
> +
> + /*
> + * Explicitly unlock the region as the AS is not unlocked
> + * automatically at the end of the operation, unlike FLUSH_MEM
> + * or FLUSH_PT.
> + */
> + ret = unlock_region(ptdev, as_nr);
> + if (ret)
> + return ret;
> +
> + return wait_ready(ptdev, as_nr);
> +}
> +
> static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
> u64 iova, u64 size, u32 op)
> {
> @@ -590,6 +628,10 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
> if (op != AS_COMMAND_UNLOCK)
> lock_region(ptdev, as_nr, iova, size);
>
> + if (panthor_hw_supports(ptdev, PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH) &&
> + (op == AS_COMMAND_FLUSH_PT || op == AS_COMMAND_FLUSH_MEM))
> + return mmu_hw_do_flush_on_gpu_ctrl(ptdev, as_nr, op);
> +
> /* Run the MMU operation */
> write_cmd(ptdev, as_nr, op);
>