Re: [PATCH] media: rc: pwm-ir-tx: use hrtimer for edge generation

From: Sean Young
Date: Fri Oct 13 2023 - 07:08:39 EST


On Thu, Oct 05, 2023 at 01:28:59PM +0300, Ivaylo Dimitrov wrote:
> usleep_range() seems to suffer from scheduling latency of up to 400 us on
> some platforms (like OMAP) which makes it unusable for IR pulses edge
> timings. In the same time pwm_ir_trx() is called in a context with priority
> which might not be suitable for real-time IR pulses generation.
>
> Fix that by using hrtimer and a thread with sched_set_fifo() priority. That
> way scheduling latency is compensated by the fact that PWM is controlled in
> the thread after a completion is signalled in hrtimer function - we have
> more or less the same latency for every edge. If completion comes earlier
> than needed, we do udelay() till the exact time for the next edge. That
> way pulse width generation is robust and precise and mostly independent of
> the system load.
>
> Tests on Nokia N900 showed that udelay() is called with up to 200 us in
> worst cases, usually times are less that 100 us.

Cc the RT folks, I would like some input on this. How bad is this for
linux-rt or non-linux-rt for latency? Is this a problem, and if so is there
an #ifdef we can use to avoid it in certain builds?

> Signed-off-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@xxxxxxxxx>
> ---
> drivers/media/rc/pwm-ir-tx.c | 122 ++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 115 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
> index 7732054c..cb6ce73 100644
> --- a/drivers/media/rc/pwm-ir-tx.c
> +++ b/drivers/media/rc/pwm-ir-tx.c
> @@ -4,6 +4,7 @@
> */
>
> #include <linux/kernel.h>
> +#include <linux/kthread.h>
> #include <linux/module.h>
> #include <linux/pwm.h>
> #include <linux/delay.h>
> @@ -17,8 +18,16 @@
>
> struct pwm_ir {
> struct pwm_device *pwm;
> + struct hrtimer timer;
> + struct task_struct *tx_thread;
> + wait_queue_head_t tx_wq;
> + struct completion tx_done;
> + struct completion edge;
> unsigned int carrier;
> unsigned int duty_cycle;
> + unsigned int *txbuf;
> + unsigned int count;
> + unsigned int index;
> };
>
> static const struct of_device_id pwm_ir_of_match[] = {
> @@ -48,12 +57,41 @@ static int pwm_ir_set_carrier(struct rc_dev *dev, u32 carrier)
> return 0;
> }
>
> -static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> - unsigned int count)
> +static enum hrtimer_restart pwm_ir_timer_cb(struct hrtimer *timer)
> +{
> + struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
> + ktime_t now;
> +
> + /*
> + * If we happen to hit an odd latency spike, loop through the
> + * pulses until we catch up.
> + */
> + do {
> + u64 edge;
> +
> + complete(&pwm_ir->edge);
> +
> + if (pwm_ir->index >= pwm_ir->count)
> + return HRTIMER_NORESTART;
> +
> + edge = US_TO_NS(pwm_ir->txbuf[pwm_ir->index]);
> + hrtimer_add_expires_ns(timer, edge);
> +
> + pwm_ir->index++;
> +
> + now = timer->base->get_time();
> +
> + } while (hrtimer_get_expires_tv64(timer) < now);
> +
> + return HRTIMER_RESTART;
> +}
> +
> +static void _pwm_ir_tx(struct pwm_ir *pwm_ir)
> {
> - struct pwm_ir *pwm_ir = dev->priv;
> struct pwm_device *pwm = pwm_ir->pwm;
> struct pwm_state state;
> + unsigned int *txbuf = pwm_ir->txbuf;
> + unsigned int count = pwm_ir->count;
> int i;
> ktime_t edge;
> long delta;
> @@ -63,6 +101,8 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
> pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
>
> + hrtimer_start(&pwm_ir->timer, 0, HRTIMER_MODE_REL);
> + wait_for_completion(&pwm_ir->edge);
> edge = ktime_get();
>
> for (i = 0; i < count; i++) {
> @@ -70,14 +110,50 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> pwm_apply_state(pwm, &state);
>
> edge = ktime_add_us(edge, txbuf[i]);
> + wait_for_completion(&pwm_ir->edge);
> +
> delta = ktime_us_delta(edge, ktime_get());
> +
> if (delta > 0)
> - usleep_range(delta, delta + 10);
> + udelay(delta);

So you're sleeping if the timer arrives early, but what about if it arrives
late? No amount of sleeping will here there.

> }
>
> state.enabled = false;
> pwm_apply_state(pwm, &state);
>
> + pwm_ir->count = 0;
> +}
> +
> +static int pwm_ir_thread(void *data)
> +{
> + struct pwm_ir *pwm_ir = data;
> +
> + for (;;) {
> + wait_event_idle(pwm_ir->tx_wq,
> + kthread_should_stop() || pwm_ir->count);
> +
> + if (kthread_should_stop())
> + break;
> +
> + _pwm_ir_tx(pwm_ir);
> + complete(&pwm_ir->tx_done);
> + }
> +
> + return 0;
> +}

Rather than creating a new thread, we could use re-use the process that
is doing the TX, i.e. put this in pwm_ir_tx() itself. That should work,
right?

> +
> +static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> + unsigned int count)
> +{
> + struct pwm_ir *pwm_ir = dev->priv;
> +
> + pwm_ir->txbuf = txbuf;
> + pwm_ir->count = count;
> + pwm_ir->index = 0;
> +
> + wake_up(&pwm_ir->tx_wq);
> + wait_for_completion(&pwm_ir->tx_done);
> +
> return count;
> }
>
> @@ -91,12 +167,24 @@ static int pwm_ir_probe(struct platform_device *pdev)
> if (!pwm_ir)
> return -ENOMEM;
>
> + platform_set_drvdata(pdev, pwm_ir);
> +
> pwm_ir->pwm = devm_pwm_get(&pdev->dev, NULL);
> if (IS_ERR(pwm_ir->pwm))
> return PTR_ERR(pwm_ir->pwm);
>
> - pwm_ir->carrier = 38000;
> + /* Use default, in case userspace does not set the carrier */
> + pwm_ir->carrier = DIV_ROUND_CLOSEST_ULL(pwm_get_period(pwm_ir->pwm),
> + NSEC_PER_SEC);
> pwm_ir->duty_cycle = 50;
> + pwm_ir->count = 0;
> +
> + init_waitqueue_head(&pwm_ir->tx_wq);
> + init_completion(&pwm_ir->edge);
> + init_completion(&pwm_ir->tx_done);
> +
> + hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + pwm_ir->timer.function = pwm_ir_timer_cb;
>
> rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
> if (!rcdev)
> @@ -110,14 +198,34 @@ static int pwm_ir_probe(struct platform_device *pdev)
> rcdev->s_tx_carrier = pwm_ir_set_carrier;
>
> rc = devm_rc_register_device(&pdev->dev, rcdev);
> - if (rc < 0)
> + if (rc < 0) {
> dev_err(&pdev->dev, "failed to register rc device\n");
> + return rc;
> + }
> +
> + pwm_ir->tx_thread = kthread_create(pwm_ir_thread, pwm_ir, "%s/tx",
> + dev_name(&pdev->dev));
> + if (IS_ERR(pwm_ir->tx_thread))
> + return PTR_ERR(pwm_ir->tx_thread);
>
> - return rc;
> + sched_set_fifo(pwm_ir->tx_thread);
> + wake_up_process(pwm_ir->tx_thread);
> +
> + return 0;
> +}
> +
> +static int pwm_ir_remove(struct platform_device *pdev)
> +{
> + struct pwm_ir *pwm_ir = platform_get_drvdata(pdev);
> +
> + kthread_stop(pwm_ir->tx_thread);
> +
> + return 0;
> }
>
> static struct platform_driver pwm_ir_driver = {
> .probe = pwm_ir_probe,
> + .remove = pwm_ir_remove,
> .driver = {
> .name = DRIVER_NAME,
> .of_match_table = pwm_ir_of_match,
> --
> 1.9.1