Re: [PATCH] remoteproc: qcom: Add NOTIFY_FATAL event type to SSR subdevice

From: Mukesh Ojha
Date: Mon May 22 2023 - 05:33:32 EST




On 5/3/2023 4:56 PM, Mukesh Ojha wrote:


On 5/3/2023 11:51 AM, Vignesh Viswanathan wrote:
Currently the SSR subdevice notifies the client driver on crash of the
rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
However the client driver might be interested to know that the device
has crashed immediately to pause any further transactions with the
rproc. This calls for an event to be sent to the driver in the IRQ
context as soon as the rproc crashes.

Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
crashed to the client driver.

Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to crash
and ensuring the registered notifier function receives the notification
in IRQ context.

This was one of valid use case we encounter in android, We have some other way of doing the same thing without core kernel change with
something called early notifiers.

https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2

https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744

But good to address this if possible.

Ack the idea of early notifier;
But here, atomic does not guarantees it to be atomic.

Acked-by: Mukesh Ojha <quic_mojha@xxxxxxxxxxx>

-- Mukesh



-Mukesh

Signed-off-by: Vignesh Viswanathan <quic_viswanat@xxxxxxxxxxx>
---
  drivers/remoteproc/qcom_common.c      | 60 +++++++++++++++++++++++++++
  drivers/remoteproc/remoteproc_core.c  | 12 ++++++
  include/linux/remoteproc.h            |  3 ++
  include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
  4 files changed, 92 insertions(+)

diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index a0d4238492e9..76542229aeb6 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -84,6 +84,7 @@ struct minidump_global_toc {
  struct qcom_ssr_subsystem {
      const char *name;
      struct srcu_notifier_head notifier_list;
+    struct atomic_notifier_head atomic_notifier_list;
      struct list_head list;
  };
@@ -366,6 +367,7 @@ static struct qcom_ssr_subsystem *qcom_ssr_get_subsys(const char *name)
      }
      info->name = kstrdup_const(name, GFP_KERNEL);
      srcu_init_notifier_head(&info->notifier_list);
+    ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
      /* Add to global notification list */
      list_add_tail(&info->list, &qcom_ssr_subsystem_list);
@@ -417,6 +419,51 @@ int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb)
  }
  EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
+/**
+ * qcom_register_ssr_atomic_notifier() - register SSR Atomic notification
+ *                     handler
+ * @name:    Subsystem's SSR name
+ * @nb:    notifier_block to be invoked upon subsystem's state change
+ *
+ * This registers the @nb notifier block as part the atomic notifier
+ * chain for a remoteproc associated with @name. The notifier block's callback
+ * will be invoked when the remote processor crashes in atomic context before
+ * the recovery process is queued.
+ *
+ * Return: a subsystem cookie on success, ERR_PTR on failure.
+ */
+void *qcom_register_ssr_atomic_notifier(const char *name,
+                    struct notifier_block *nb)
+{
+    struct qcom_ssr_subsystem *info;
+
+    info = qcom_ssr_get_subsys(name);
+    if (IS_ERR(info))
+        return info;
+
+    atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
+
+    return &info->atomic_notifier_list;
+}
+EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
+
+/**
+ * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic notification
+ *                       handler
+ * @notify:    subsystem cookie returned from qcom_register_ssr_notifier
+ * @nb:        notifier_block to unregister
+ *
+ * This function will unregister the notifier from the atomic notifier
+ * chain.
+ *
+ * Return: 0 on success, %ENOENT otherwise.
+ */
+int qcom_unregister_ssr_atomic_notifier(void *notify, struct notifier_block *nb)
+{
+    return atomic_notifier_chain_unregister(notify, nb);
+}
+EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
+
  static int ssr_notify_prepare(struct rproc_subdev *subdev)
  {
      struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
@@ -467,6 +514,18 @@ static void ssr_notify_unprepare(struct rproc_subdev *subdev)
                   QCOM_SSR_AFTER_SHUTDOWN, &data);
  }
+static void ssr_notify_crash(struct rproc_subdev *subdev)
+{
+    struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
+    struct qcom_ssr_notify_data data = {
+        .name = ssr->info->name,
+        .crashed = true,
+    };
+
+    atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
+                   QCOM_SSR_NOTIFY_CRASH, &data);
+}
+
  /**
   * qcom_add_ssr_subdev() - register subdevice as restart notification source
   * @rproc:    rproc handle
@@ -493,6 +552,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
      ssr->subdev.start = ssr_notify_start;
      ssr->subdev.stop = ssr_notify_stop;
      ssr->subdev.unprepare = ssr_notify_unprepare;
+    ssr->subdev.notify_crash = ssr_notify_crash;
      rproc_add_subdev(rproc, &ssr->subdev);
  }
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 695cce218e8c..3de0ece158ea 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1139,6 +1139,16 @@ static void rproc_unprepare_subdevices(struct rproc *rproc)
      }
  }
+static void rproc_notify_crash_subdevices(struct rproc *rproc)
+{
+    struct rproc_subdev *subdev;
+
+    list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
+        if (subdev->notify_crash)
+            subdev->notify_crash(subdev);
+    }
+}
+
  /**
   * rproc_alloc_registered_carveouts() - allocate all carveouts registered
   * in the list
@@ -2687,6 +2697,8 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
      dev_err(&rproc->dev, "crash detected in %s: type %s\n",
          rproc->name, rproc_crash_to_string(type));
+    rproc_notify_crash_subdevices(rproc);
+
      queue_work(rproc_recovery_wq, &rproc->crash_handler);
  }
  EXPORT_SYMBOL(rproc_report_crash);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index fe8978eb69f1..f3c0e0103e81 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -596,6 +596,8 @@ struct rproc {
   * @stop: stop function, called before the rproc is stopped; the @crashed
   *        parameter indicates if this originates from a recovery
   * @unprepare: unprepare function, called after the rproc has been stopped
+ * @notify_crash: notify_crash function, called in atomic context to notify
+ *          rproc has crashed and recovery is about to start
   */
  struct rproc_subdev {
      struct list_head node;
@@ -604,6 +606,7 @@ struct rproc_subdev {
      int (*start)(struct rproc_subdev *subdev);
      void (*stop)(struct rproc_subdev *subdev, bool crashed);
      void (*unprepare)(struct rproc_subdev *subdev);
+    void (*notify_crash)(struct rproc_subdev *subdev);
  };
  /* we currently support only two vrings per rvdev */
diff --git a/include/linux/remoteproc/qcom_rproc.h b/include/linux/remoteproc/qcom_rproc.h
index 82b211518136..f3d06900f297 100644
--- a/include/linux/remoteproc/qcom_rproc.h
+++ b/include/linux/remoteproc/qcom_rproc.h
@@ -11,12 +11,14 @@ struct notifier_block;
   * @QCOM_SSR_AFTER_POWERUP:    Remoteproc is running (start stage)
   * @QCOM_SSR_BEFORE_SHUTDOWN:    Remoteproc crashed or shutting down (stop stage)
   * @QCOM_SSR_AFTER_SHUTDOWN:    Remoteproc is down (unprepare stage)
+ * @QCOM_SSR_NOTIFY_CRASH:    Remoteproc crashed
   */
  enum qcom_ssr_notify_type {
      QCOM_SSR_BEFORE_POWERUP,
      QCOM_SSR_AFTER_POWERUP,
      QCOM_SSR_BEFORE_SHUTDOWN,
      QCOM_SSR_AFTER_SHUTDOWN,
+    QCOM_SSR_NOTIFY_CRASH,
  };
  struct qcom_ssr_notify_data {
@@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
  void *qcom_register_ssr_notifier(const char *name, struct notifier_block *nb);
  int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb);
+void *qcom_register_ssr_atomic_notifier(const char *name,
+                    struct notifier_block *nb);
+int qcom_unregister_ssr_atomic_notifier(void *notify,
+                    struct notifier_block *nb);
  #else
  static inline void *qcom_register_ssr_notifier(const char *name,
@@ -43,6 +49,17 @@ static inline int qcom_unregister_ssr_notifier(void *notify,
      return 0;
  }
+static inline void *qcom_register_ssr_atomic_notifier(const char *name,
+                              struct notifier_block *nb)
+{
+    return 0;
+}
+
+static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
+                              struct notifier_block *nb)
+{
+    return 0;
+}
  #endif
  #endif