[PATCHv3] lib/ratelimit: Lockless ratelimiting

From: Dmitry Safonov
Date: Tue Jul 03 2018 - 18:56:37 EST


Currently ratelimit_state is protected with spin_lock. If the .lock is
taken at the moment of ___ratelimit() call, the message is suppressed to
make ratelimiting robust.

That results in the following issue issue:
CPU0 CPU1
------------------ ------------------
printk_ratelimit() printk_ratelimit()
| |
try_spin_lock() try_spin_lock()
| |
time_is_before_jiffies() return 0; // suppress

So, concurrent call of ___ratelimit() results in silently suppressing
one of the messages, regardless if the limit is reached or not.
And rc->missed is not increased in such case so the issue is covered
from user.

Convert ratelimiting to use atomic counters and drop spin_lock.

Note: That might be unexpected, but with the first interval of messages
storm one can print up to burst*2 messages. So, it doesn't guarantee
that in *any* interval amount of messages is lesser than burst.
But, that differs lightly from previous behavior where one can start
burst=5 interval and print 4 messages on the last milliseconds of
interval + new 5 messages from new interval (totally 9 messages in
lesser than interval value):

msg0 msg1-msg4 msg0-msg4
| | |
| | |
|--o---------------------o-|-----o--------------------|--> (t)
<------->
Lesser than burst

Dropped dev/random patch since v1 version:
lkml.kernel.org/r/<20180510125211.12583-1-dima@xxxxxxxxxx>

Dropped `name' in as it's unused in RATELIMIT_STATE_INIT()

Cc: Andy Shevchenko <andy.shevchenko@xxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: David Airlie <airlied@xxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Jani Nikula <jani.nikula@xxxxxxxxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
Cc: "Theodore Ts'o" <tytso@xxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: intel-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: dri-devel@xxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Dmitry Safonov <dima@xxxxxxxxxx>
---
drivers/char/random.c | 28 ++++++++-----------
drivers/gpu/drm/i915/i915_perf.c | 8 ++++--
fs/btrfs/super.c | 16 +++++------
fs/xfs/scrub/scrub.c | 2 +-
include/linux/ratelimit.h | 31 ++++++++++++---------
kernel/user.c | 2 +-
lib/ratelimit.c | 59 +++++++++++++++++++---------------------
7 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index cd888d4ee605..0be31b3eadab 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -439,10 +439,8 @@ static void _crng_backtrack_protect(struct crng_state *crng,
static void process_random_ready_list(void);
static void _get_random_bytes(void *buf, int nbytes);

-static struct ratelimit_state unseeded_warning =
- RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
-static struct ratelimit_state urandom_warning =
- RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
+static struct ratelimit_state unseeded_warning = RATELIMIT_STATE_INIT(HZ, 3);
+static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT(HZ, 3);

static int ratelimit_disable __read_mostly;

@@ -937,24 +935,22 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
crng->init_time = jiffies;
spin_unlock_irqrestore(&crng->lock, flags);
if (crng == &primary_crng && crng_init < 2) {
+ unsigned int unseeded_miss, urandom_miss;
+
invalidate_batched_entropy();
numa_crng_init();
crng_init = 2;
process_random_ready_list();
wake_up_interruptible(&crng_init_wait);
pr_notice("random: crng init done\n");
- if (unseeded_warning.missed) {
- pr_notice("random: %d get_random_xx warning(s) missed "
- "due to ratelimiting\n",
- unseeded_warning.missed);
- unseeded_warning.missed = 0;
- }
- if (urandom_warning.missed) {
- pr_notice("random: %d urandom warning(s) missed "
- "due to ratelimiting\n",
- urandom_warning.missed);
- urandom_warning.missed = 0;
- }
+ unseeded_miss = atomic_xchg(&unseeded_warning.missed, 0);
+ if (unseeded_miss)
+ pr_notice("random: %u get_random_xx warning(s) missed "
+ "due to ratelimiting\n", unseeded_miss);
+ urandom_miss = atomic_xchg(&urandom_warning.missed, 0);
+ if (urandom_miss)
+ pr_notice("random: %u urandom warning(s) missed "
+ "due to ratelimiting\n", urandom_miss);
}
}

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 019bd2d073ad..dcfba3023547 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1295,6 +1295,7 @@ free_oa_buffer(struct drm_i915_private *i915)
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ unsigned int msg_missed;

BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);

@@ -1317,9 +1318,10 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)

put_oa_config(dev_priv, stream->oa_config);

- if (dev_priv->perf.oa.spurious_report_rs.missed) {
- DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
- dev_priv->perf.oa.spurious_report_rs.missed);
+ msg_missed = atomic_read(&dev_priv->perf.oa.spurious_report_rs.missed);
+ if (msg_missed) {
+ DRM_NOTE("%u spurious OA report notices suppressed due to ratelimiting\n",
+ msg_missed);
}
}

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81107ad49f3a..ffab6c23bc50 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -177,14 +177,14 @@ static const char * const logtypes[] = {
* messages doesn't cause more important ones to be dropped.
*/
static struct ratelimit_state printk_limits[] = {
- RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
- RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
- RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
- RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
- RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
- RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
- RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
- RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(DEFAULT_RATELIMIT_INTERVAL, 100),
};

void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 58ae76b3a421..8b58b439694a 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -349,7 +349,7 @@ xfs_scrub_experimental_warning(
struct xfs_mount *mp)
{
static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT(
- "xfs_scrub_warning", 86400 * HZ, 1);
+ 86400 * HZ, 1);
ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE);

if (__ratelimit(&scrub_warning))
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 8ddf79e9207a..f9a9386dd869 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -4,7 +4,6 @@

#include <linux/param.h>
#include <linux/sched.h>
-#include <linux/spinlock.h>

#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
#define DEFAULT_RATELIMIT_BURST 10
@@ -13,38 +12,39 @@
#define RATELIMIT_MSG_ON_RELEASE BIT(0)

struct ratelimit_state {
- raw_spinlock_t lock; /* protect the state */
+ atomic_t printed;
+ atomic_t missed;

int interval;
int burst;
- int printed;
- int missed;
unsigned long begin;
unsigned long flags;
};

-#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \
- .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+#define RATELIMIT_STATE_INIT(interval_init, burst_init) { \
.interval = interval_init, \
.burst = burst_init, \
+ .printed = ATOMIC_INIT(0), \
+ .missed = ATOMIC_INIT(0), \
}

#define RATELIMIT_STATE_INIT_DISABLED \
- RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
+ RATELIMIT_STATE_INIT(0, DEFAULT_RATELIMIT_BURST)

#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \
\
struct ratelimit_state name = \
- RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
+ RATELIMIT_STATE_INIT(interval_init, burst_init)

static inline void ratelimit_state_init(struct ratelimit_state *rs,
int interval, int burst)
{
memset(rs, 0, sizeof(*rs));

- raw_spin_lock_init(&rs->lock);
rs->interval = interval;
rs->burst = burst;
+ atomic_set(&rs->printed, 0);
+ atomic_set(&rs->missed, 0);
}

static inline void ratelimit_default_init(struct ratelimit_state *rs)
@@ -53,16 +53,21 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs)
DEFAULT_RATELIMIT_BURST);
}

+/*
+ * Keeping It Simple: not reenterable and not safe for concurrent
+ * ___ratelimit() call as used only by devkmsg_release().
+ */
static inline void ratelimit_state_exit(struct ratelimit_state *rs)
{
+ int missed;
+
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))
return;

- if (rs->missed) {
+ missed = atomic_xchg(&rs->missed, 0);
+ if (missed)
pr_warn("%s: %d output lines suppressed due to ratelimiting\n",
- current->comm, rs->missed);
- rs->missed = 0;
- }
+ current->comm, missed);
}

static inline void
diff --git a/kernel/user.c b/kernel/user.c
index 36288d840675..a964f842d8f0 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -101,7 +101,7 @@ struct user_struct root_user = {
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
.uid = GLOBAL_ROOT_UID,
- .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
+ .ratelimit = RATELIMIT_STATE_INIT(0, 0),
};

/*
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index d01f47135239..d9b749d40108 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -13,6 +13,18 @@
#include <linux/jiffies.h>
#include <linux/export.h>

+static void ratelimit_end_interval(struct ratelimit_state *rs, const char *func)
+{
+ rs->begin = jiffies;
+
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ unsigned int missed = atomic_xchg(&rs->missed, 0);
+
+ if (missed)
+ pr_warn("%s: %u callbacks suppressed\n", func, missed);
+ }
+}
+
/*
* __ratelimit - rate limiting
* @rs: ratelimit_state data
@@ -27,45 +39,30 @@
*/
int ___ratelimit(struct ratelimit_state *rs, const char *func)
{
- unsigned long flags;
- int ret;
-
if (!rs->interval)
return 1;

- /*
- * If we contend on this state's lock then almost
- * by definition we are too busy to print a message,
- * in addition to the one that will be printed by
- * the entity that is holding the lock already:
- */
- if (!raw_spin_trylock_irqsave(&rs->lock, flags))
+ if (unlikely(!rs->burst)) {
+ atomic_add_unless(&rs->missed, 1, -1);
+ if (time_is_before_jiffies(rs->begin + rs->interval))
+ ratelimit_end_interval(rs, func);
+
return 0;
+ }

- if (!rs->begin)
- rs->begin = jiffies;
+ if (atomic_add_unless(&rs->printed, 1, rs->burst))
+ return 1;

if (time_is_before_jiffies(rs->begin + rs->interval)) {
- if (rs->missed) {
- if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
- printk_deferred(KERN_WARNING
- "%s: %d callbacks suppressed\n",
- func, rs->missed);
- rs->missed = 0;
- }
- }
- rs->begin = jiffies;
- rs->printed = 0;
- }
- if (rs->burst && rs->burst > rs->printed) {
- rs->printed++;
- ret = 1;
- } else {
- rs->missed++;
- ret = 0;
+ if (atomic_cmpxchg(&rs->printed, rs->burst, 0))
+ ratelimit_end_interval(rs, func);
}
- raw_spin_unlock_irqrestore(&rs->lock, flags);

- return ret;
+ if (atomic_add_unless(&rs->printed, 1, rs->burst))
+ return 1;
+
+ atomic_add_unless(&rs->missed, 1, -1);
+
+ return 0;
}
EXPORT_SYMBOL(___ratelimit);
--
2.13.6