Re: [PATCH 2/2] module: add support to avoid duplicates early on load

From: Linus Torvalds
Date: Mon May 29 2023 - 21:58:15 EST


On Mon, May 29, 2023 at 11:18 AM Johan Hovold <johan@xxxxxxxxxx> wrote:
>
> I took a closer look at some of the modules that failed to load and
> noticed a pattern in that they have dependencies that are needed by more
> than one device.

Ok, this is a "maybe something like this" RFC series of two patches -
one trivial one to re-organize things a bit so that we can then do the
real one which uses a filter based on the inode pointer to return an
"idempotent return value" for module loads that share the same inode.

It's entirely untested, and since I'm on the road I'm going to not
really be able to test it. It compiles for me, and the code looks
fairly straightforward, but it's probably buggy.

It's very loosely based on Luis' attempt, but it
(a) is internal to module loading
(b) uses a reliable cookie
(c) doesn't leave the cookie around randomly for later
(d) has seen absolutely no testing

Put another way: if somebody wants to play with this, please treat it
as a starting point, not the final thing. You might need to debug
things, and fix silly mistakes.

The idea is to just have a simple hash list of currently executing
module loads, protected by a trivial spinlock. Every module loader
adds itself to the right hash list, and if they were the *first* one
(ie no other pending module loads for that inode), will actually do
the module load.

Everybody who *isn't* the first one will just wait for completion and
return the same error code that the first one returned.

This is technically bogus. The first one might fail due to arguments.
So the cookie shouldn't be just the inode, it should be the inode and
a hash of the arguments or something like that. But it is what it is,
and apart from possible show-stopper bugs this is no worse than the
failed "exclusive write deny" attempt. IOW - maybe worth trying?

And if *that* didn't sell people on this patch series, I don't know
what will. I should be in marketing! Two drink minimums, here I come!

Linus
From b7d19af1b2a3ca9f789df9c04147576fca7c5b8f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Date: Mon, 29 May 2023 20:55:13 -0400
Subject: [PATCH 1/2] module: split up 'finit_module()' into
init_module_from_file() helper

This will simplify the next step, where we can then key off the inode to
do one idempotent module load.

Let's do the obvious re-organization in one step, and then the new code
in another.

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---
kernel/module/main.c | 42 +++++++++++++++++++++++++++---------------
1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index 044aa2c9e3cb..427bffa2844f 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3057,26 +3057,16 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
return load_module(&info, uargs, 0);
}

-SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
+static int init_module_from_file(struct file *f, const char __user * uargs, int flags)
{
struct load_info info = { };
void *buf = NULL;
int len;
- int err;

- err = may_init_module();
- if (err)
- return err;
+ if (!f || !(f->f_mode & FMODE_READ))
+ return -EBADF;

- pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
-
- if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
- |MODULE_INIT_IGNORE_VERMAGIC
- |MODULE_INIT_COMPRESSED_FILE))
- return -EINVAL;
-
- len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL,
- READING_MODULE);
+ len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE);
if (len < 0) {
mod_stat_inc(&failed_kreads);
mod_stat_add_long(len, &invalid_kread_bytes);
@@ -3084,7 +3074,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
}

if (flags & MODULE_INIT_COMPRESSED_FILE) {
- err = module_decompress(&info, buf, len);
+ int err = module_decompress(&info, buf, len);
vfree(buf); /* compressed data is no longer needed */
if (err) {
mod_stat_inc(&failed_decompress);
@@ -3099,6 +3089,28 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
return load_module(&info, uargs, flags);
}

+SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
+{
+ int err;
+ struct fd f;
+
+ err = may_init_module();
+ if (err)
+ return err;
+
+ pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
+
+ if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
+ |MODULE_INIT_IGNORE_VERMAGIC
+ |MODULE_INIT_COMPRESSED_FILE))
+ return -EINVAL;
+
+ f = fdget(fd);
+ err = init_module_from_file(f.file, uargs, flags);
+ fdput(f);
+ return err;
+}
+
/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
char *module_flags(struct module *mod, char *buf, bool show_state)
{
--
2.40.0.rc1.2.gd15644fe02

From c844099ab4d3032424b4bf8720e761fbccf88ea5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Date: Mon, 29 May 2023 21:39:51 -0400
Subject: [PATCH 2/2] modules: catch concurrent module loads, take two

This treats concurrent module loads from a file as "idempotent" in the
inode, ie if one module load is ongoing, we don't start a new one, but
instead just expect the first one to complete and return the same return
value as it did.

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---
kernel/module/main.c | 73 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index 427bffa2844f..82b0dcc1fe77 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3057,15 +3057,82 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
return load_module(&info, uargs, 0);
}

+struct idempotent {
+ const void *cookie;
+ struct hlist_node entry;
+ struct completion complete;
+ int ret;
+};
+
+#define IDEM_HASH_BITS 8
+static struct hlist_head idem_hash[1 << IDEM_HASH_BITS];
+static struct spinlock idem_lock;
+
+static bool idempotent(struct idempotent *u, const void *cookie)
+{
+ int hash = hash_ptr(cookie, IDEM_HASH_BITS);
+ struct hlist_head *head = idem_hash + hash;
+ struct idempotent *existing;
+ bool first;
+
+ u->ret = 0;
+ u->cookie = cookie;
+ init_completion(&u->complete);
+
+ spin_lock(&idem_lock);
+ first = true;
+ hlist_for_each_entry(existing, head, entry) {
+ if (existing->cookie != cookie)
+ continue;
+ first = false;
+ break;
+ }
+ hlist_add_head(&u->entry, idem_hash+hash);
+ spin_unlock(&idem_lock);
+
+ return !first;
+}
+
+/*
+ * We were the first one with 'cookie' on the list, and we ended
+ * up completing the operation. We now need to walk the list,
+ * remove everybody - which includes ourselfs - fill in the return
+ * value, and then complete the operation.
+ */
+static void idempotent_complete(struct idempotent *u, int ret)
+{
+ const void *cookie = u->cookie;
+ int hash = hash_ptr(cookie, IDEM_HASH_BITS);
+ struct hlist_head *head = idem_hash + hash;
+ struct hlist_node *next;
+ struct idempotent *pos;
+
+ spin_lock(&idem_lock);
+ hlist_for_each_entry_safe(pos, next, head, entry) {
+ if (pos->cookie != cookie)
+ continue;
+ hlist_del(&pos->entry);
+ pos->ret = ret;
+ complete(&pos->complete);
+ }
+ spin_unlock(&idem_lock);
+}
+
static int init_module_from_file(struct file *f, const char __user * uargs, int flags)
{
+ struct idempotent idem;
struct load_info info = { };
void *buf = NULL;
- int len;
+ int len, ret;

if (!f || !(f->f_mode & FMODE_READ))
return -EBADF;

+ if (idempotent(&idem, file_inode(f))) {
+ wait_for_completion(&idem.complete);
+ return idem.ret;
+ }
+
len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE);
if (len < 0) {
mod_stat_inc(&failed_kreads);
@@ -3086,7 +3153,9 @@ static int init_module_from_file(struct file *f, const char __user * uargs, int
info.len = len;
}

- return load_module(&info, uargs, flags);
+ ret = load_module(&info, uargs, flags);
+ idempotent_complete(&idem, ret);
+ return ret;
}

SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
--
2.40.0.rc1.2.gd15644fe02