Re: [PATCH v4 2/4] remoteproc: Introduce "panic" callback in ops

From: Mathieu Poirier
Date: Mon Mar 23 2020 - 18:29:24 EST


On Mon, Mar 09, 2020 at 11:38:15PM -0700, Bjorn Andersson wrote:
> Introduce generic support for handling kernel panics in remoteproc
> drivers, in order to allow operations needed for aiding in post mortem
> system debugging, such as flushing caches etc.
>
> The function can return a number of milliseconds needed by the remote to
> "settle" and the core will wait the longest returned duration before
> returning from the panic handler.
>
> Signed-off-by: Bjorn Andersson <bjorn.andersson@xxxxxxxxxx>
> ---
>
> Change since v3:
> - Migrate from mutex_trylock() to using RCU
> - Turned the timeout to unsigned long
>
> drivers/remoteproc/remoteproc_core.c | 44 ++++++++++++++++++++++++++++
> include/linux/remoteproc.h | 3 ++
> 2 files changed, 47 insertions(+)
>
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index f0a77c30c6b1..2024a98930bf 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -16,6 +16,7 @@
>
> #define pr_fmt(fmt) "%s: " fmt, __func__
>
> +#include <linux/delay.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> #include <linux/device.h>
> @@ -43,6 +44,7 @@
>
> static DEFINE_MUTEX(rproc_list_mutex);
> static LIST_HEAD(rproc_list);
> +static struct notifier_block rproc_panic_nb;
>
> typedef int (*rproc_handle_resource_t)(struct rproc *rproc,
> void *, int offset, int avail);
> @@ -2219,10 +2221,51 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
> }
> EXPORT_SYMBOL(rproc_report_crash);
>
> +static int rproc_panic_handler(struct notifier_block *nb, unsigned long event,
> + void *ptr)
> +{
> + unsigned int longest = 0;
> + struct rproc *rproc;
> + unsigned int d;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(rproc, &rproc_list, node) {
> + if (!rproc->ops->panic || rproc->state != RPROC_RUNNING)
> + continue;

To do things correctly rproc->state would need to be protected by the
rproc->mutex, which would violate RCU's rule of not blocking inside a read-side
critical section. And going back to using the rproc_list_mutex as in your
previous version would likely set off the lockdep mechanic quickly.

I don't have a solution, just noting that a potential race does exist. On the
flip side consequences are minimal.

Reviewed-by: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx>

> +
> + d = rproc->ops->panic(rproc);
> + longest = max(longest, d);
> + }
> + rcu_read_unlock();
> +
> + /*
> + * Delay for the longest requested duration before returning.
> + * This can be used by the remoteproc drivers to give the remote
> + * processor time to perform any requested operations (such as flush
> + * caches), where means for signalling the Linux side isn't available
> + * while in panic.
> + */
> + mdelay(longest);
> +
> + return NOTIFY_DONE;
> +}
> +
> +static void __init rproc_init_panic(void)
> +{
> + rproc_panic_nb.notifier_call = rproc_panic_handler;
> + atomic_notifier_chain_register(&panic_notifier_list, &rproc_panic_nb);
> +}
> +
> +static void __exit rproc_exit_panic(void)
> +{
> + atomic_notifier_chain_unregister(&panic_notifier_list, &rproc_panic_nb);
> +}
> +
> static int __init remoteproc_init(void)
> {
> rproc_init_sysfs();
> rproc_init_debugfs();
> + rproc_init_panic();
>
> return 0;
> }
> @@ -2232,6 +2275,7 @@ static void __exit remoteproc_exit(void)
> {
> ida_destroy(&rproc_dev_index);
>
> + rproc_exit_panic();
> rproc_exit_debugfs();
> rproc_exit_sysfs();
> }
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 16ad66683ad0..5959d6247dc0 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -369,6 +369,8 @@ enum rsc_handling_status {
> * expects to find it
> * @sanity_check: sanity check the fw image
> * @get_boot_addr: get boot address to entry point specified in firmware
> + * @panic: optional callback to react to system panic, core will delay
> + * panic at least the returned number of milliseconds
> */
> struct rproc_ops {
> int (*start)(struct rproc *rproc);
> @@ -383,6 +385,7 @@ struct rproc_ops {
> int (*load)(struct rproc *rproc, const struct firmware *fw);
> int (*sanity_check)(struct rproc *rproc, const struct firmware *fw);
> u32 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw);
> + unsigned long (*panic)(struct rproc *rproc);
> };
>
> /**
> --
> 2.24.0
>