[PATCH 2/2] rcu/kvfree: Introduce KFREE_DRAIN_JIFFIES_[MAX/MIN] interval

From: Uladzislau Rezki (Sony)
Date: Thu Jun 02 2022 - 04:07:03 EST


Currently the monitor work is scheduled with a fixed interval that
is HZ/20 or each 50 milliseconds. The drawback of such approach is
a low utilization of page slot in some scenarios. The page can store
up to 512 records. For example on Android system it can look like:

<snip>
kworker/3:0-13872 [003] .... 11286.007048: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000026522604 nr_records=1
kworker/3:0-13872 [003] .... 11286.015638: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000095ed6fca nr_records=2
kworker/1:2-20434 [001] .... 11286.051230: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000044872ffd nr_records=1
kworker/1:2-20434 [001] .... 11286.059322: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000026522604 nr_records=2
kworker/0:1-20052 [000] .... 11286.095295: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000044872ffd nr_records=2
kworker/0:1-20052 [000] .... 11286.103418: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000cbcf05db nr_records=1
kworker/2:3-14372 [002] .... 11286.135155: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000095ed6fca nr_records=2
kworker/2:3-14372 [002] .... 11286.135198: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000044872ffd nr_records=1
kworker/1:2-20434 [001] .... 11286.155377: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000cbcf05db nr_records=5
kworker/2:3-14372 [002] .... 11286.167181: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000026522604 nr_records=5
kworker/1:2-20434 [001] .... 11286.179202: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000008ef95e14 nr_records=1
kworker/2:3-14372 [002] .... 11286.187398: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000c597d297 nr_records=6
kworker/3:0-13872 [003] .... 11286.187445: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000050bf92e2 nr_records=3
kworker/1:2-20434 [001] .... 11286.198975: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000cbcf05db nr_records=4
kworker/1:2-20434 [001] .... 11286.207203: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000095ed6fca nr_records=4
<snip>

where a page only carries few records to reclaim a memory. In order to
improve batching and make utilization more efficient the patch introduces
a drain interval that can be set either to KFREE_DRAIN_JIFFIES_MAX or
KFREE_DRAIN_JIFFIES_MIN. It is adjusted if a flood is detected, in this
case a memory reclaim occurs more often whereas in mostly idle cases the
interval is set to its maximum timeout that improves the utilization of
page slots.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx>
---
kernel/rcu/tree.c | 29 +++++++++++++++++++++++++----
1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index fd16c0b46d9e..c02a64995b85 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3249,7 +3249,8 @@ EXPORT_SYMBOL_GPL(call_rcu);


/* Maximum number of jiffies to wait before draining a batch. */
-#define KFREE_DRAIN_JIFFIES (HZ / 50)
+#define KFREE_DRAIN_JIFFIES_MAX (HZ)
+#define KFREE_DRAIN_JIFFIES_MIN (HZ / 50)
#define KFREE_N_BATCHES 2
#define FREE_N_CHANNELS 2

@@ -3510,6 +3511,26 @@ need_offload_krc(struct kfree_rcu_cpu *krcp)
return !!krcp->head;
}

+static void
+schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+{
+ long delay, delay_left;
+
+ delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ?
+ KFREE_DRAIN_JIFFIES_MIN:KFREE_DRAIN_JIFFIES_MAX;
+
+ if (delayed_work_pending(&krcp->monitor_work)) {
+ delay_left = krcp->monitor_work.timer.expires - jiffies;
+
+ if (delay < delay_left)
+ mod_delayed_work(system_wq, &krcp->monitor_work, delay);
+
+ return;
+ }
+
+ queue_delayed_work(system_wq, &krcp->monitor_work, delay);
+}
+
/*
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
*/
@@ -3567,7 +3588,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
// work to repeat an attempt. Because previous batches are
// still in progress.
if (need_offload_krc(krcp))
- schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);

raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
@@ -3755,7 +3776,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)

// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
- schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);

unlock_return:
krc_this_cpu_unlock(krcp, flags);
@@ -3831,7 +3852,7 @@ void __init kfree_rcu_scheduler_running(void)

raw_spin_lock_irqsave(&krcp->lock, flags);
if (need_offload_krc(krcp))
- schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
}
--
2.30.2