Re: [PATCH 1/2] mailbox: mtk-cmdq: Change GCE hardware timeout to software timeout
From: Jason-JH Lin (林睿祥)
Date: Fri Jan 12 2024 - 01:53:01 EST
On Thu, 2024-01-11 at 01:50 +0000, CK Hu (胡俊光) wrote:
> Hi, Jason:
>
> On Wed, 2024-01-10 at 23:51 +0800, Jason-JH.Lin wrote:
> > GCE axi_clock 156MHz, 1 tick cycle = 6.41ns.
> >
> > The register CMDQ_INSTN_TIMEOUT_CYCLES is a GCE hardware
> > configuration
> > for instruction timeout cycles. It's the cycles to issue
> > instruction
> > timeout interrupt for wait and poll instructions.
> >
> > This timeout setting is coarse-grain and has 100% uncertainty,
> > which means that if it is set to 16 cycles, the timeout will be
> > reduced
> > from 16 * 2 = 32 cycles to 16 cycles.
> > If it is set to 64 cycles, the timeout will be reduced from 64 * 2
> > =
> > 128
> > cycles to 64 cycles.
> >
> > Current CMDQ_INSTN_TIMEOUT_CYCLES is set to 22, it means
> > instruction
> > timeout is reduced from 2^22 * 2 * 6.41ns = 53.8ms to 26.9ms.
> >
> > Since the max value of CMDQ_INSTN_TIMEOUT_CYCLES is 27, it means
> > the
> > max
> > instruction timeout is reduced from 2^27 * 2 * 6.41ns = 1720ms to
> > 860ms.
> >
> > It's not enough for the use case of ISP driver below:
> > GCE Thread A: wait for SOF and set event 1.
> > GCE Thread B: wait for event 1 and set event 2.
> > GCE Thread C: wait for event 2 and set event 3.
> > GCE Thread D: wait for event 3 and set event 4.
> > GCE Thread E: wait for event 4 and set EOF.
> > If all GCE Threads start at the same time, the latest GCE Thread E
> > will
> > wait for event more than 2 seconds.
>
> So wasting design. I could use one GCE thread to do this because they
> does jobs sequentially.
>
This design is for the ISP feature called Motion Compensation Noise
Reduction (MCNR), which involves multiple HW engines. Each HW engines
will request one GCE thread to set their configurations for processing
a part of one frame.
In order to reduce SW overhead as much as possible, it is hoped that
each time the ISP HW processes part of the frame data, SW can prepare
the frame data that the HW will continue to use.
In addition, MCNR has data dependencies, so it needs to be scheduled by
GCE events to ensure that when GCE starts HW, the corresponding data
dependencies are ready.
That's why one frame is divided into multiple inputs, set to the HW
modules, and executed in different GCE threads.
> About the timeout, I would like the client driver to process the
> timeout. For example, if one client driver has send 2 packet, the
> first
> one should run less than 1 ms, the second one should run less 500 ms,
> different packet has different timeout value, only the client driver
> could process different timeout value for each packet. I think
> different client driver would have different timeout process method.
> The drm driver use vblank count to decide timeout. So the timeout
> would
> vary by client driver. Therefore, it's better that client driver to
> process the timeout. In this case, let ISP driver to setup timer to
> detect timeout and gce driver is not necessary to do any
> modification.
>
OK, I'll remove the CMDQ HW timeout and let other client drivers
implement their own SW timeout.
Regards,
Jason-JH.Lin
> Regards,
> CK
>
> >
> > Therefore, we changed the hardware timeout to software timeout,
> > making it
> > longer, more certain, and making it configurable by CMDQ client
> > drivers.
> >
> > Signed-off-by: Jason-JH.Lin <jason-jh.lin@xxxxxxxxxxxx>
> > ---
> > drivers/mailbox/mtk-cmdq-mailbox.c | 172
> > +++++++++++++++++++++++
> > include/linux/mailbox/mtk-cmdq-mailbox.h | 3 +
> > 2 files changed, 175 insertions(+)
> >
> > diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c
> > b/drivers/mailbox/mtk-cmdq-mailbox.c
> > index de862e9137d5..89567f837513 100644
> > --- a/drivers/mailbox/mtk-cmdq-mailbox.c
> > +++ b/drivers/mailbox/mtk-cmdq-mailbox.c
> > @@ -12,6 +12,9 @@
> > #include <linux/iopoll.h>
> > #include <linux/kernel.h>
> > #include <linux/module.h>
> > +#include <linux/timer.h>
> > +#include <linux/workqueue.h>
> > +#include <linux/sched/clock.h>
> > #include <linux/platform_device.h>
> > #include <linux/pm_runtime.h>
> > #include <linux/mailbox_controller.h>
> > @@ -64,6 +67,11 @@ struct cmdq_thread {
> > void __iomem *base;
> > struct list_head task_busy_list;
> > u32 priority;
> > + u32 idx;
> > + struct timer_list timeout;
> > + u32 timeout_ms;
> > + struct work_struct timeout_work;
> > + u64 timer_mod;
> > };
> >
> > struct cmdq_task {
> > @@ -83,6 +91,7 @@ struct cmdq {
> > struct cmdq_thread *thread;
> > struct clk_bulk_data clocks[CMDQ_GCE_NUM_MAX];
> > bool suspended;
> > + struct workqueue_struct *timeout_wq;
> > };
> >
> > struct gce_plat {
> > @@ -288,6 +297,158 @@ static void cmdq_thread_irq_handler(struct
> > cmdq
> > *cmdq,
> >
> > if (list_empty(&thread->task_busy_list))
> > cmdq_thread_disable(cmdq, thread);
> > +
> > + if (!task) {
> > + cmdq_thread_disable(cmdq, thread);
> > + pr_debug("empty task thread:%u", thread->idx);
> > + } else {
> > + mod_timer(&thread->timeout, jiffies +
> > + msecs_to_jiffies(thread->timeout_ms));
> > + thread->timer_mod = sched_clock();
> > + pr_debug("mod_timer pkt:0x%p timeout:%u thread:%u",
> > + task->pkt, thread->timeout_ms, thread->idx);
> > + }
> > +}
> > +
> > +static bool cmdq_thread_timeout_exceed(struct cmdq_thread *thread)
> > +{
> > + u64 duration;
> > +
> > + /*
> > + * If the first execution time stamp is smaller than timeout
> > value,
> > + * it is the last round of timeout. Skip it.
> > + */
> > + duration = div_s64(sched_clock() - thread->timer_mod, 1000000);
> > + if (duration < thread->timeout_ms) {
> > + mod_timer(&thread->timeout, jiffies +
> > + msecs_to_jiffies(thread->timeout_ms -
> > duration));
> > + thread->timer_mod = sched_clock();
> > + pr_debug("thread:%u mod time:%llu dur:%llu timeout not
> > exceed",
> > + thread->idx, thread->timer_mod, duration);
> > + return false;
> > + }
> > +
> > + return true;
> > +}
> > +
> > +static void cmdq_thread_handle_timeout_work(struct work_struct
> > *work_item)
> > +{
> > + struct cmdq_thread *thread = container_of(work_item,
> > + struct cmdq_thread, timeout_work);
> > + struct cmdq *cmdq = container_of(thread->chan->mbox, struct
> > cmdq, mbox);
> > + struct cmdq_task *task, *tmp, *timeout_task = NULL;
> > + unsigned long flags;
> > + dma_addr_t pa_curr;
> > + struct list_head removes;
> > +
> > + INIT_LIST_HEAD(&removes);
> > +
> > + spin_lock_irqsave(&thread->chan->lock, flags);
> > +
> > + if (list_empty(&thread->task_busy_list)) {
> > + spin_unlock_irqrestore(&thread->chan->lock, flags);
> > + return;
> > + }
> > +
> > + /* Check before suspending thread to prevent performance
> > penalty. */
> > + if (!cmdq_thread_timeout_exceed(thread)) {
> > + spin_unlock_irqrestore(&thread->chan->lock, flags);
> > + return;
> > + }
> > +
> > + WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
> > +
> > + /*
> > + * Although IRQ is disabled, GCE continues to execute.
> > + * It may have pending IRQ before GCE thread is suspended,
> > + * so check this condition again.
> > + */
> > + cmdq_thread_irq_handler(cmdq, thread);
> > +
> > + if (list_empty(&thread->task_busy_list)) {
> > + pr_err("thread:%u empty after irq handle in timeout",
> > thread->idx);
> > + goto unlock_free_done;
> > + }
> > +
> > + /* After IRQ, the first task may change. */
> > + if (!cmdq_thread_timeout_exceed(thread)) {
> > + cmdq_thread_resume(thread);
> > + goto unlock_free_done;
> > + }
> > +
> > + pr_err("timeout for thread:0x%p idx:%u", thread->base, thread-
> > > idx);
> >
> > +
> > + pa_curr = readl(thread->base + CMDQ_THR_CURR_ADDR) << cmdq-
> > > pdata->shift;
> >
> > + list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
> > + list_entry) {
> > + u32 task_end_pa = task->pa_base + task->pkt-
> > > cmd_buf_size;
> >
> > +
> > + if (pa_curr >= task->pa_base && pa_curr < task_end_pa)
> > {
> > + timeout_task = task;
> > + break;
> > + }
> > +
> > + pr_info("ending not curr in timeout pkt:0x%p
> > curr_pa:%pa", task->pkt, &pa_curr);
> > + cmdq_task_exec_done(task, 0);
> > + kfree(task);
> > + }
> > +
> > + if (timeout_task) {
> > + spin_unlock_irqrestore(&thread->chan->lock, flags);
> > +
> > + cmdq_task_exec_done(timeout_task, -ETIMEDOUT);
> > +
> > + spin_lock_irqsave(&thread->chan->lock, flags);
> > +
> > + task = list_first_entry_or_null(&thread-
> > > task_busy_list,
> >
> > + struct cmdq_task,
> > list_entry);
> > + if (timeout_task == task) {
> > + cmdq_task_exec_done(task, -ETIMEDOUT);
> > + kfree(task);
> > + } else {
> > + pr_err("task list changed");
> > + }
> > + }
> > +
> > + task = list_first_entry_or_null(&thread->task_busy_list,
> > + struct cmdq_task, list_entry);
> > + if (task) {
> > + mod_timer(&thread->timeout, jiffies +
> > + msecs_to_jiffies(thread->timeout_ms));
> > + thread->timer_mod = sched_clock();
> > + cmdq_thread_reset(cmdq, thread);
> > + cmdq_thread_resume(thread);
> > + } else {
> > + cmdq_thread_resume(thread);
> > + cmdq_thread_disable(cmdq, thread);
> > + pm_runtime_mark_last_busy(cmdq->mbox.dev);
> > + }
> > +
> > +unlock_free_done:
> > + spin_unlock_irqrestore(&thread->chan->lock, flags);
> > +
> > + list_for_each_entry_safe(task, tmp, &removes, list_entry) {
> > + list_del(&task->list_entry);
> > + kfree(task);
> > + }
> > +}
> > +
> > +static void cmdq_thread_handle_timeout(struct timer_list *t)
> > +{
> > + struct cmdq_thread *thread = from_timer(thread, t, timeout);
> > + struct cmdq *cmdq = container_of(thread->chan->mbox, struct
> > cmdq, mbox);
> > + unsigned long flags;
> > + bool empty;
> > +
> > + spin_lock_irqsave(&thread->chan->lock, flags);
> > + empty = list_empty(&thread->task_busy_list);
> > + spin_unlock_irqrestore(&thread->chan->lock, flags);
> > +
> > + if (empty || work_pending(&thread->timeout_work))
> > + return;
> > +
> > + pr_debug("queue cmdq timeout thread:%u", thread->idx);
> > + queue_work(cmdq->timeout_wq, &thread->timeout_work);
> > }
> >
> > static irqreturn_t cmdq_irq_handler(int irq, void *dev)
> > @@ -426,6 +587,11 @@ static int cmdq_mbox_send_data(struct
> > mbox_chan
> > *chan, void *data)
> > writel(thread->priority, thread->base +
> > CMDQ_THR_PRIORITY);
> > writel(CMDQ_THR_IRQ_EN, thread->base +
> > CMDQ_THR_IRQ_ENABLE);
> > writel(CMDQ_THR_ENABLED, thread->base +
> > CMDQ_THR_ENABLE_TASK);
> > + if (thread->timeout_ms != CMDQ_NO_TIMEOUT) {
> > + mod_timer(&thread->timeout, jiffies +
> > + msecs_to_jiffies(thread-
> > > timeout_ms));
> >
> > + thread->timer_mod = sched_clock();
> > + }
> > } else {
> > WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
> > curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) <<
> > @@ -657,10 +823,14 @@ static int cmdq_probe(struct platform_device
> > *pdev)
> > return -ENOMEM;
> >
> > for (i = 0; i < cmdq->pdata->thread_nr; i++) {
> > + cmdq->thread[i].idx = i;
> > cmdq->thread[i].base = cmdq->base + CMDQ_THR_BASE +
> > CMDQ_THR_SIZE * i;
> > + cmdq->thread[i].timeout_ms = CMDQ_TIMEOUT_DEFAULT;
> > INIT_LIST_HEAD(&cmdq->thread[i].task_busy_list);
> > cmdq->mbox.chans[i].con_priv = (void *)&cmdq-
> > > thread[i];
> >
> > + timer_setup(&cmdq->thread[i].timeout,
> > cmdq_thread_handle_timeout, 0);
> > + INIT_WORK(&cmdq->thread[i].timeout_work,
> > cmdq_thread_handle_timeout_work);
> > }
> >
> > err = devm_mbox_controller_register(dev, &cmdq->mbox);
> > @@ -669,6 +839,8 @@ static int cmdq_probe(struct platform_device
> > *pdev)
> > return err;
> > }
> >
> > + cmdq->timeout_wq =
> > create_singlethread_workqueue("cmdq_timeout_handler");
> > +
> > platform_set_drvdata(pdev, cmdq);
> >
> > WARN_ON(clk_bulk_prepare(cmdq->pdata->gce_num, cmdq->clocks));
> > diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h
> > b/include/linux/mailbox/mtk-cmdq-mailbox.h
> > index a8f0070c7aa9..4973b2ec37db 100644
> > --- a/include/linux/mailbox/mtk-cmdq-mailbox.h
> > +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
> > @@ -21,6 +21,9 @@
> > #define CMDQ_WFE_WAIT BIT(15)
> > #define CMDQ_WFE_WAIT_VALUE 0x1
> >
> > +#define CMDQ_TIMEOUT_DEFAULT 1000
> > +#define CMDQ_NO_TIMEOUT 0xffffffff
> > +
> > /*
> > * WFE arg_b
> > * bit 0-11: wait value