[PATCH rcu 3/3] rcu/kvfree: Update KFREE_DRAIN_JIFFIES interval

From: Paul E. McKenney
Date: Wed Aug 31 2022 - 14:11:24 EST


From: "Uladzislau Rezki (Sony)" <urezki@xxxxxxxxx>

Currently the monitor work is scheduled with a fixed interval of HZ/20,
which is roughly 50 milliseconds. The drawback of this approach is
low utilization of the 512 page slots in scenarios with infrequence
kvfree_rcu() calls. For example on an Android system:

<snip>
kworker/3:3-507 [003] .... 470.286305: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d0f0dde5 nr_records=6
kworker/6:1-76 [006] .... 470.416613: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000ea0d6556 nr_records=1
kworker/6:1-76 [006] .... 470.416625: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000003e025849 nr_records=9
kworker/3:3-507 [003] .... 471.390000: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000815a8713 nr_records=48
kworker/1:1-73 [001] .... 471.725785: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000fda9bf20 nr_records=3
kworker/1:1-73 [001] .... 471.725833: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000a425b67b nr_records=76
kworker/0:4-1411 [000] .... 472.085673: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000007996be9d nr_records=1
kworker/0:4-1411 [000] .... 472.085728: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d0f0dde5 nr_records=5
kworker/6:1-76 [006] .... 472.260340: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000065630ee4 nr_records=102
<snip>

In many cases, out of 512 slots, fewer than 10 were actually used.
In order to improve batching and make utilization more efficient this
commit sets a drain interval to a fixed 5-seconds interval. Floods are
detected when a page fills quickly, and in that case, the reclaim work
is re-scheduled for the next scheduling-clock tick (jiffy).

After this change:

<snip>
kworker/7:1-371 [007] .... 5630.725708: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000005ab0ffb3 nr_records=121
kworker/7:1-371 [007] .... 5630.989702: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000060c84761 nr_records=47
kworker/7:1-371 [007] .... 5630.989714: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000000babf308 nr_records=510
kworker/7:1-371 [007] .... 5631.553790: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000bb7bd0ef nr_records=169
kworker/7:1-371 [007] .... 5631.553808: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000044c78753 nr_records=510
kworker/5:6-9428 [005] .... 5631.746102: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d98519aa nr_records=123
kworker/4:7-9434 [004] .... 5632.001758: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000526c9d44 nr_records=322
kworker/4:7-9434 [004] .... 5632.002073: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000002c6a8afa nr_records=185
kworker/7:1-371 [007] .... 5632.277515: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000007f4a962f nr_records=510
<snip>

Here, all but one of the cases, more than one hundreds slots were used,
representing an order-of-magnitude improvement.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx>
Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
---
kernel/rcu/tree.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3d234d536d4c3..7b90478b752e8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2832,7 +2832,7 @@ EXPORT_SYMBOL_GPL(call_rcu);


/* Maximum number of jiffies to wait before draining a batch. */
-#define KFREE_DRAIN_JIFFIES (HZ / 50)
+#define KFREE_DRAIN_JIFFIES (5 * HZ)
#define KFREE_N_BATCHES 2
#define FREE_N_CHANNELS 2

@@ -3093,6 +3093,21 @@ need_offload_krc(struct kfree_rcu_cpu *krcp)
return !!krcp->head;
}

+static void
+schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+{
+ long delay, delay_left;
+
+ delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
+ if (delayed_work_pending(&krcp->monitor_work)) {
+ delay_left = krcp->monitor_work.timer.expires - jiffies;
+ if (delay < delay_left)
+ mod_delayed_work(system_wq, &krcp->monitor_work, delay);
+ return;
+ }
+ queue_delayed_work(system_wq, &krcp->monitor_work, delay);
+}
+
/*
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
*/
@@ -3150,7 +3165,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
// work to repeat an attempt. Because previous batches are
// still in progress.
if (need_offload_krc(krcp))
- schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);

raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
@@ -3339,7 +3354,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)

// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
- schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);

unlock_return:
krc_this_cpu_unlock(krcp, flags);
@@ -3415,7 +3430,7 @@ void __init kfree_rcu_scheduler_running(void)

raw_spin_lock_irqsave(&krcp->lock, flags);
if (need_offload_krc(krcp))
- schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
}
--
2.31.1.189.g2e36527f23