Re: [PATCH v4 2/2] drm/xe: Remove devcoredump during driver release

From: Rodrigo Vivi
Date: Tue Apr 09 2024 - 18:22:05 EST


On Tue, Apr 09, 2024 at 01:02:06PM -0700, José Roberto de Souza wrote:
> This will remove devcoredump from file system and free its resources
> during driver unload.
>
> This fix the driver unload after gpu hang happened, otherwise this
> it would report that Xe KMD is still in use and it would leave the
> kernel in a state that Xe KMD can't be unload without a reboot.
>
> Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
> Cc: Jonathan Cavitt <jonathan.cavitt@xxxxxxxxx>
> Acked-by: Jonathan Cavitt <jonathan.cavitt@xxxxxxxxx>
> Signed-off-by: José Roberto de Souza <jose.souza@xxxxxxxxx>

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>

> ---
> drivers/gpu/drm/xe/xe_devcoredump.c | 13 ++++++++++++-
> drivers/gpu/drm/xe/xe_devcoredump.h | 6 ++++++
> drivers/gpu/drm/xe/xe_device.c | 4 ++++
> 3 files changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 283ca7518aff2..3d7980232be1c 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -9,6 +9,8 @@
> #include <linux/devcoredump.h>
> #include <generated/utsrelease.h>
>
> +#include <drm/drm_managed.h>
> +
> #include "xe_device.h"
> #include "xe_exec_queue.h"
> #include "xe_force_wake.h"
> @@ -235,5 +237,14 @@ void xe_devcoredump(struct xe_sched_job *job)
> dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
> xe_devcoredump_read, xe_devcoredump_free);
> }
> -#endif
>
> +static void xe_driver_devcoredump_fini(struct drm_device *drm, void *arg)
> +{
> + dev_coredump_put(drm->dev);
> +}
> +
> +int xe_devcoredump_init(struct xe_device *xe)
> +{
> + return drmm_add_action_or_reset(&xe->drm, xe_driver_devcoredump_fini, xe);
> +}
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
> index df8671f0b5eb2..e2fa65ce09322 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.h
> @@ -11,10 +11,16 @@ struct xe_sched_job;
>
> #ifdef CONFIG_DEV_COREDUMP
> void xe_devcoredump(struct xe_sched_job *job);
> +int xe_devcoredump_init(struct xe_device *xe);
> #else
> static inline void xe_devcoredump(struct xe_sched_job *job)
> {
> }
> +
> +static inline int xe_devcoredump_init(struct xe_device *xe)
> +{
> + return 0;
> +}
> #endif
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 9083f5e02dd9e..ce27d0d1bdb34 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -20,6 +20,7 @@
> #include "regs/xe_regs.h"
> #include "xe_bo.h"
> #include "xe_debugfs.h"
> +#include "xe_devcoredump.h"
> #include "xe_dma_buf.h"
> #include "xe_drm_client.h"
> #include "xe_drv.h"
> @@ -513,6 +514,9 @@ int xe_device_probe(struct xe_device *xe)
> return err;
> }
>
> + err = xe_devcoredump_init(xe);
> + if (err)
> + return err;
> err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
> if (err)
> return err;
> --
> 2.44.0
>