Re: [RFC][PATCH] printk: increase devkmsg write() ratelimit

From: Sergey Senozhatsky
Date: Thu Dec 20 2018 - 06:35:44 EST


On (12/18/18 12:37), Steven Rostedt wrote:
> >
> > Again, complain to system-doofus for printing so much crap to somewhere
> > it should not print to begin with.
>
> I've been saying that it would be good to make the kmsg be a separate
> buffer that just gets interleaved with the kernel buffer.

Hmm, this is interesting.

> Userspace processes should never be able to overwrite messages
> from the kernel.

I would agree.

It's a bit funny that kernel people first take the patch in and then,
when user-space begins to use the feature, start to object and ratelimit.

By the way, systemd seems to be OK with alternative logging targets

/etc/systemd/system.conf

---
[Manager]
#LogLevel=info
LogTarget=syslog console journal
---

Everything looks fine with read-only devkmsg (running the patched
kernel). "journalctl -f -b" gives a nice system log:

...
kernel: r8169 0000:04:00.0 enp4s0: renamed from eth0
kernel: snd_hda_codec_realtek hdaudioC0D0: Line=0x1a
systemd-udevd[180]: link_config: autonegotiation is unset or enabled, the speed and duplex are not writable.
systemd[1]: Started Flush Journal to Persistent Storage.
kernel: input: HDA Intel PCH Line as /devices/pci0000:00/0000:00:1f.3/sound/card0/input7
...


Given how often systemd hits ratelimits (I googled some bug reports),
I wonder if systemd people are still interested in /dev/kmsg logging
at all.


---

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 7b4e4de778e4..6d35115c5629 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -875,7 +875,7 @@ static const struct memdev {
[8] = { "random", 0666, &random_fops, 0 },
[9] = { "urandom", 0666, &urandom_fops, 0 },
#ifdef CONFIG_PRINTK
- [11] = { "kmsg", 0644, &kmsg_fops, 0 },
+ [11] = { "kmsg", 0444, &kmsg_fops, 0 },
#endif
};

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 829fe6fb098a..48c4ccac9fce 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -770,7 +770,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
struct devkmsg_user {
u64 seq;
u32 idx;
- struct ratelimit_state rs;
struct mutex lock;
char buf[CONSOLE_EXT_LOG_MAX];
};
@@ -788,69 +787,6 @@ int devkmsg_emit(int facility, int level, const char *fmt, ...)
return r;
}

-static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
-{
- char *buf, *line;
- int level = default_message_loglevel;
- int facility = 1; /* LOG_USER */
- struct file *file = iocb->ki_filp;
- struct devkmsg_user *user = file->private_data;
- size_t len = iov_iter_count(from);
- ssize_t ret = len;
-
- if (!user || len > LOG_LINE_MAX)
- return -EINVAL;
-
- /* Ignore when user logging is disabled. */
- if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
- return len;
-
- /* Ratelimit when not explicitly enabled. */
- if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) {
- if (!___ratelimit(&user->rs, current->comm))
- return ret;
- }
-
- buf = kmalloc(len+1, GFP_KERNEL);
- if (buf == NULL)
- return -ENOMEM;
-
- buf[len] = '\0';
- if (!copy_from_iter_full(buf, len, from)) {
- kfree(buf);
- return -EFAULT;
- }
-
- /*
- * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
- * the decimal value represents 32bit, the lower 3 bit are the log
- * level, the rest are the log facility.
- *
- * If no prefix or no userspace facility is specified, we
- * enforce LOG_USER, to be able to reliably distinguish
- * kernel-generated messages from userspace-injected ones.
- */
- line = buf;
- if (line[0] == '<') {
- char *endp = NULL;
- unsigned int u;
-
- u = simple_strtoul(line + 1, &endp, 10);
- if (endp && endp[0] == '>') {
- level = LOG_LEVEL(u);
- if (LOG_FACILITY(u) != 0)
- facility = LOG_FACILITY(u);
- endp++;
- len -= endp - line;
- line = endp;
- }
- }
-
- devkmsg_emit(facility, level, "%s", line);
- kfree(buf);
- return ret;
-}
-
static ssize_t devkmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -998,9 +934,6 @@ static int devkmsg_open(struct inode *inode, struct file *file)
if (!user)
return -ENOMEM;

- ratelimit_default_init(&user->rs);
- ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE);
-
mutex_init(&user->lock);

logbuf_lock_irq();
@@ -1019,8 +952,6 @@ static int devkmsg_release(struct inode *inode, struct file *file)
if (!user)
return 0;

- ratelimit_state_exit(&user->rs);
-
mutex_destroy(&user->lock);
kfree(user);
return 0;
@@ -1029,7 +960,6 @@ static int devkmsg_release(struct inode *inode, struct file *file)
const struct file_operations kmsg_fops = {
.open = devkmsg_open,
.read = devkmsg_read,
- .write_iter = devkmsg_write,
.llseek = devkmsg_llseek,
.poll = devkmsg_poll,
.release = devkmsg_release,