Re: + kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race.patch added to -mm tree
From: Oleg Nesterov
Date: Fri May 21 2021 - 12:35:43 EST
On 05/20, Andrew Morton wrote:
>
> --- a/kernel/kthread.c~kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race
> +++ a/kernel/kthread.c
> @@ -1181,6 +1181,19 @@ bool kthread_mod_delayed_work(struct kth
> goto out;
>
> ret = __kthread_cancel_work(work, true, &flags);
> +
> + /*
> + * Canceling could run in parallel from kthread_cancel_delayed_work_sync
> + * and change work's canceling count as the spinlock is released and regain
> + * in __kthread_cancel_work so we need to check the count again. Otherwise,
> + * we might incorrectly queue the dwork and further cause
> + * cancel_delayed_work_sync thread waiting for flush dwork endlessly.
> + */
> + if (work->canceling) {
> + ret = false;
> + goto out;
> + }
> +
> fast_queue:
> __kthread_queue_delayed_work(worker, dwork, delay);
Never looked at this code before, can't review...
but note that another caller of __kthread_queue_delayed_work() needs to
check work->canceling too. So perhaps we should simply add queuing_blocked()
into __kthread_queue_delayed_work() ?
Something like below, uncompiled/untested, most probably incorrect.
Either way, this comment
* Return: %true if @dwork was pending and its timer was modified,
* %false otherwise.
above kthread_mod_delayed_work looks obviously wrong. Currently it returns
true if this work was pending. With your patch it returns true if it was
pending and not canceling.
With the patch below it returns true if the work was (re)queued successfully,
and this makes more sense to me. But again, I can easily misread this code.
In any case, even if my patch is correct, I won't insist, your fix is
much simpler.
Oleg.
--- x/kernel/kthread.c
+++ x/kernel/kthread.c
@@ -977,7 +977,7 @@ void kthread_delayed_work_timer_fn(struc
}
EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
-static void __kthread_queue_delayed_work(struct kthread_worker *worker,
+static bool __kthread_queue_delayed_work(struct kthread_worker *worker,
struct kthread_delayed_work *dwork,
unsigned long delay)
{
@@ -987,6 +987,9 @@ static void __kthread_queue_delayed_work
WARN_ON_FUNCTION_MISMATCH(timer->function,
kthread_delayed_work_timer_fn);
+ if (queuing_blocked(worker, work))
+ return false;
+
/*
* If @delay is 0, queue @dwork->work immediately. This is for
* both optimization and correctness. The earliest @timer can
@@ -995,7 +998,7 @@ static void __kthread_queue_delayed_work
*/
if (!delay) {
kthread_insert_work(worker, work, &worker->work_list);
- return;
+ return true;
}
/* Be paranoid and try to detect possible races already now. */
@@ -1005,6 +1008,7 @@ static void __kthread_queue_delayed_work
work->worker = worker;
timer->expires = jiffies + delay;
add_timer(timer);
+ return true;
}
/**
@@ -1028,16 +1032,12 @@ bool kthread_queue_delayed_work(struct k
{
struct kthread_work *work = &dwork->work;
unsigned long flags;
- bool ret = false;
+ bool ret;
raw_spin_lock_irqsave(&worker->lock, flags);
-
- if (!queuing_blocked(worker, work)) {
- __kthread_queue_delayed_work(worker, dwork, delay);
- ret = true;
- }
-
+ ret = __kthread_queue_delayed_work(worker, dwork, delay);
raw_spin_unlock_irqrestore(&worker->lock, flags);
+
return ret;
}
EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
@@ -1180,9 +1180,9 @@ bool kthread_mod_delayed_work(struct kth
if (work->canceling)
goto out;
- ret = __kthread_cancel_work(work, true, &flags);
+ __kthread_cancel_work(work, true, &flags);
fast_queue:
- __kthread_queue_delayed_work(worker, dwork, delay);
+ ret = __kthread_queue_delayed_work(worker, dwork, delay);
out:
raw_spin_unlock_irqrestore(&worker->lock, flags);
return ret;