Re: [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch()

From: Paul E. McKenney
Date: Wed Aug 28 2019 - 17:57:11 EST


On Tue, Aug 27, 2019 at 03:01:59PM -0400, Joel Fernandes (Google) wrote:
> Now that kfree_rcu() special casing have been removed from tree RCU,
> remove kfree_call_rcu_nobatch() since it is not needed.
>
> Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>

Now -this- one qualifies as a nice negative delta! ;-)

A few things below, please fix in next version.

Thanx, Paul

> ---
> .../admin-guide/kernel-parameters.txt | 4 ---
> include/linux/rcutiny.h | 5 ---
> include/linux/rcutree.h | 1 -
> kernel/rcu/rcuperf.c | 10 +-----
> kernel/rcu/tree.c | 33 ++++++++-----------
> 5 files changed, 14 insertions(+), 39 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 24fe8aefb12c..56be0e30100b 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3909,10 +3909,6 @@
> Number of loops doing rcuperf.kfree_alloc_num number
> of allocations and frees.
>
> - rcuperf.kfree_no_batch= [KNL]
> - Use the non-batching (less efficient) version of kfree_rcu().
> - This is useful for comparing with the batched version.
> -
> rcuperf.nreaders= [KNL]
> Set number of RCU readers. The value -1 selects
> N, where N is the number of CPUs. A value
> diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> index 949841f52ec5..7aa93afa5d8d 100644
> --- a/include/linux/rcutiny.h
> +++ b/include/linux/rcutiny.h
> @@ -39,11 +39,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> call_rcu(head, func);
> }
>
> -static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> -{
> - call_rcu(head, func);
> -}
> -
> void rcu_qs(void);
>
> static inline void rcu_softirq_qs(void)
> diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
> index 961b7e05d141..0b68aa952f8b 100644
> --- a/include/linux/rcutree.h
> +++ b/include/linux/rcutree.h
> @@ -34,7 +34,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
>
> void synchronize_rcu_expedited(void);
> void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
> -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func);
>
> void rcu_barrier(void);
> bool rcu_eqs_special_set(int cpu);
> diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
> index c1e25fd10f2a..da94b89cd531 100644
> --- a/kernel/rcu/rcuperf.c
> +++ b/kernel/rcu/rcuperf.c
> @@ -593,7 +593,6 @@ rcu_perf_shutdown(void *arg)
> torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu().");
> torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration.");
> torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees.");
> -torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu().");
>
> static struct task_struct **kfree_reader_tasks;
> static int kfree_nrealthreads;
> @@ -632,14 +631,7 @@ kfree_perf_thread(void *arg)
> if (!alloc_ptr)
> return -ENOMEM;
>
> - if (!kfree_no_batch) {
> - kfree_rcu(alloc_ptr, rh);
> - } else {
> - rcu_callback_t cb;
> -
> - cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh);
> - kfree_call_rcu_nobatch(&(alloc_ptr->rh), cb);
> - }
> + kfree_rcu(alloc_ptr, rh);
> }
>
> cond_resched();
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 12c17e10f2b4..c767973d62ac 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2777,8 +2777,10 @@ static void kfree_rcu_work(struct work_struct *work)
> rcu_lock_acquire(&rcu_callback_map);
> trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
>
> - /* Could be possible to optimize with kfree_bulk in future */
> - kfree((void *)head - offset);
> + if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
> + /* Could be optimized with kfree_bulk() in future. */
> + kfree((void *)head - offset);
> + }

This really needs to be in the previous patch until such time as Tiny RCU
no longer needs the restriction.

> rcu_lock_release(&rcu_callback_map);
> cond_resched_tasks_rcu_qs();
> @@ -2856,16 +2858,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
> spin_unlock_irqrestore(&krcp->lock, flags);
> }
>
> -/*
> - * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
> - * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
> - */
> -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> -{
> - __call_rcu(head, func);
> -}
> -EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
> -
> /*
> * Queue a request for lazy invocation of kfree() after a grace period.
> *
> @@ -2885,12 +2877,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> unsigned long flags;
> struct kfree_rcu_cpu *krcp;
>
> - /* kfree_call_rcu() batching requires timers to be up. If the scheduler
> - * is not yet up, just skip batching and do the non-batched version.
> - */
> - if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
> - return kfree_call_rcu_nobatch(head, func);
> -
> if (debug_rcu_head_queue(head)) {
> /* Probable double kfree_rcu() */
> WARN_ONCE(1, "kfree_call_rcu(): Double-freed call. rcu_head %p\n",
> @@ -2909,8 +2895,15 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> krcp->head = head;
>
> /* Schedule monitor for timely drain after KFREE_DRAIN_JIFFIES. */
> - if (!xchg(&krcp->monitor_todo, true))
> - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> + if (!xchg(&krcp->monitor_todo, true)) {
> + /* Scheduling the monitor requires scheduler/timers to be up,
> + * if it is not, just skip it. An eventual kfree_rcu() will
> + * kick it again.
> + */
> + if ((rcu_scheduler_active == RCU_SCHEDULER_RUNNING)) {
> + schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> + }
> + }

And this also needs to be in an earlier patch. Bisectability and all that!

Are we really guaranteed that there will be an eventual kfree_rcu()?
More of a worry for Tiny RCU than for Tree RCU, but still could be
annoying for someone trying to debug a memory leak.

Thanx, Paul

> spin_unlock(&krcp->lock);
> local_irq_restore(flags);
> --
> 2.23.0.187.g17f5b7556c-goog
>