Re: [PATCH 2/3] venus: Rework recovery mechanism

From: Fritz Koenig
Date: Fri Aug 07 2020 - 17:29:31 EST


On Thu, Jul 30, 2020 at 4:47 AM Stanimir Varbanov
<stanimir.varbanov@xxxxxxxxxx> wrote:
>
> After power domains and clock restructuring the recovery for
> sdm845 and v4 did not work properly. Fix that by reworking the
> recovery function and the sequence.
>
> Signed-off-by: Stanimir Varbanov <stanimir.varbanov@xxxxxxxxxx>
> ---
> drivers/media/platform/qcom/venus/core.c | 24 ++++++++++---------
> drivers/media/platform/qcom/venus/hfi_venus.c | 11 ---------
> 2 files changed, 13 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
> index 203c6538044f..46f6e34d435a 100644
> --- a/drivers/media/platform/qcom/venus/core.c
> +++ b/drivers/media/platform/qcom/venus/core.c
> @@ -6,6 +6,7 @@
> #include <linux/init.h>
> #include <linux/interconnect.h>
> #include <linux/ioctl.h>
> +#include <linux/delay.h>
> #include <linux/list.h>
> #include <linux/module.h>
> #include <linux/of_device.h>
> @@ -40,13 +41,7 @@ static void venus_event_notify(struct venus_core *core, u32 event)
> mutex_unlock(&core->lock);
>
> disable_irq_nosync(core->irq);
> -
> - /*
> - * Delay recovery to ensure venus has completed any pending cache
> - * operations. Without this sleep, we see device reset when firmware is
> - * unloaded after a system error.
> - */
> - schedule_delayed_work(&core->work, msecs_to_jiffies(100));
> + schedule_delayed_work(&core->work, msecs_to_jiffies(10));
> }
>
> static const struct hfi_core_ops venus_core_ops = {
> @@ -59,23 +54,30 @@ static void venus_sys_error_handler(struct work_struct *work)
> container_of(work, struct venus_core, work.work);
> int ret = 0;
>
> - dev_warn(core->dev, "system error has occurred, starting recovery!\n");
> -
> pm_runtime_get_sync(core->dev);
>
> hfi_core_deinit(core, true);
> - hfi_destroy(core);
> +
> + dev_warn(core->dev, "system error has occurred, starting recovery!\n");
> +
> mutex_lock(&core->lock);
> +
> + while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))
> + msleep(10);
> +
> venus_shutdown(core);
>
> pm_runtime_put_sync(core->dev);
>
> + while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))
> + usleep_range(1000, 1500);
> +
> + hfi_destroy(core);
> ret |= hfi_create(core, &venus_core_ops);
>
> pm_runtime_get_sync(core->dev);
>
> ret |= venus_boot(core);
> -
> ret |= hfi_core_resume(core, true);
>
> enable_irq(core->irq);
> diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
> index 0d8855014ab3..3392fd177d22 100644
> --- a/drivers/media/platform/qcom/venus/hfi_venus.c
> +++ b/drivers/media/platform/qcom/venus/hfi_venus.c
> @@ -986,13 +986,6 @@ static void venus_process_msg_sys_error(struct venus_hfi_device *hdev,
>
> venus_set_state(hdev, VENUS_STATE_DEINIT);
>
> - /*
> - * Once SYS_ERROR received from HW, it is safe to halt the AXI.
> - * With SYS_ERROR, Venus FW may have crashed and HW might be
> - * active and causing unnecessary transactions. Hence it is
> - * safe to stop all AXI transactions from venus subsystem.
> - */
> - venus_halt_axi(hdev);
> venus_sfr_print(hdev);
> }
>
> @@ -1009,10 +1002,6 @@ static irqreturn_t venus_isr_thread(struct venus_core *core)
> res = hdev->core->res;
> pkt = hdev->pkt_buf;
>
> - if (hdev->irq_status & WRAPPER_INTR_STATUS_A2HWD_MASK) {
> - venus_sfr_print(hdev);
> - hfi_process_watchdog_timeout(core);
> - }
>
> while (!venus_iface_msgq_read(hdev, pkt)) {
> msg_ret = hfi_process_msg_packet(core, pkt);
> --
> 2.17.1
>
Reviewed-by: Fritz Koenig <frkoenig@xxxxxxxxxxxx>