[PATCH 1/2] ratelimit: Reduce ratelimit's false-positive (alternative)

From: Petr Mladek
Date: Tue Apr 08 2025 - 10:00:22 EST


---
include/linux/ratelimit.h | 2 +-
include/linux/ratelimit_types.h | 2 +-
lib/ratelimit.c | 51 ++++++++++++++++++++++++---------
3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index adfec24061d1..7aaad158ee37 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -44,7 +44,7 @@ static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, in
raw_spin_lock_irqsave(&rs->lock, flags);
rs->interval = interval_init;
rs->flags &= ~RATELIMIT_INITIALIZED;
- rs->printed = 0;
+ atomic_set(&rs->rs_n_left, rs->burst);
ratelimit_state_reset_miss(rs);
raw_spin_unlock_irqrestore(&rs->lock, flags);
}
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index ef6711b6b229..b19c4354540a 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -18,7 +18,7 @@ struct ratelimit_state {

int interval;
int burst;
- int printed;
+ atomic_t rs_n_left;
atomic_t missed;
unsigned int flags;
unsigned long begin;
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index bd6e3b429e33..90c9fe57eb42 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -39,12 +39,22 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
return 1;

/*
- * If we contend on this state's lock then almost
- * by definition we are too busy to print a message,
- * in addition to the one that will be printed by
- * the entity that is holding the lock already:
+ * If we contend on this state's lock then just check if
+ * the current burst is used or not. It might cause
+ * false positive when we are past the interval and
+ * the current lock owner is just about to reset it.
*/
if (!raw_spin_trylock_irqsave(&rs->lock, flags)) {
+ unsigned int rs_flags = READ_ONCE(rs->flags);
+
+ if (rs_flags & RATELIMIT_INITIALIZED && burst) {
+ int n_left;
+
+ n_left = atomic_dec_return(&rs->rs_n_left);
+ if (n_left >= 0)
+ return 1;
+ }
+
ratelimit_state_inc_miss(rs);
return 0;
}
@@ -52,27 +62,42 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
if (!(rs->flags & RATELIMIT_INITIALIZED)) {
rs->begin = jiffies;
rs->flags |= RATELIMIT_INITIALIZED;
+ atomic_set(&rs->rs_n_left, rs->burst);
}

if (time_is_before_jiffies(rs->begin + interval)) {
- int m = ratelimit_state_reset_miss(rs);
+ int m;

+ /*
+ * Reset rs_n_left ASAP to reduce false positives
+ * in parallel calls, see above.
+ */
+ atomic_set(&rs->rs_n_left, rs->burst);
+ rs->begin = jiffies;
+
+ m = ratelimit_state_reset_miss(rs);
if (m) {
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
printk_deferred(KERN_WARNING
"%s: %d callbacks suppressed\n", func, m);
}
}
- rs->begin = jiffies;
- rs->printed = 0;
}
- if (burst && burst > rs->printed) {
- rs->printed++;
- ret = 1;
- } else {
- ratelimit_state_inc_miss(rs);
- ret = 0;
+ if (burst) {
+ int n_left;
+
+ /* The burst might have been taken by a parallel call. */
+ n_left = atomic_dec_return(&rs->rs_n_left);
+ if (n_left >= 0) {
+ ret = 1;
+ goto unlock_ret;
+ }
}
+
+ ratelimit_state_inc_miss(rs);
+ ret = 0;
+
+unlock_ret:
raw_spin_unlock_irqrestore(&rs->lock, flags);

return ret;
--
2.49.0

It is still racy. But I think that all versions were somehow racy.

BTW: The new selftest fails with it. There seems to be two kind of
problems:

1. Timing. The test seems to be a bit sensitive on timing.
It helped me to do not go that close to the "interval":

diff --git a/lib/test_ratelimit.c b/lib/test_ratelimit.c
index 3d6db9be6be2..539aa968e858 100644
--- a/lib/test_ratelimit.c
+++ b/lib/test_ratelimit.c
@@ -24,19 +24,19 @@ static void test_ratelimit_smoke(struct kunit *test)
test_ratelimited(test, true);
test_ratelimited(test, false);

- schedule_timeout_idle(TESTRL_INTERVAL - 20);
+ schedule_timeout_idle(TESTRL_INTERVAL - 40);
test_ratelimited(test, false);

- schedule_timeout_idle(30);
+ schedule_timeout_idle(50);
test_ratelimited(test, true);

schedule_timeout_idle(2 * TESTRL_INTERVAL);
test_ratelimited(test, true);
test_ratelimited(test, true);

- schedule_timeout_idle(TESTRL_INTERVAL - 20);
+ schedule_timeout_idle(TESTRL_INTERVAL - 40);
test_ratelimited(test, true);
- schedule_timeout_idle(30);
+ schedule_timeout_idle(50);
test_ratelimited(test, true);
test_ratelimited(test, true);
test_ratelimited(test, true);


2. It still failed on burst = 0 test:

// Test disabling.
testrl.burst = 0;
test_ratelimited(test, true);

I haven't tried the selftest with the original code. But it looks
to me like it always returned "false" here as well. But I agree
that it would make more sense to return "true" here.

Best Regards,
Petr