[PATCH] binfmt_misc: add two-steps registration (opt-in)
From: Domenico Andreoli
Date: Tue Mar 01 2022 - 08:28:45 EST
From: Domenico Andreoli <domenico.andreoli@xxxxxxxxx>
Experimenting with new interpreter configurations can lead to annoying
failures, when the system is left unable to load ELF binaries power
cycling is the only way to get it back operational.
This patch tries to mitigate such conditions by adding an opt-in
two-steps registration.
A new optional field is added to the configuration string, it's an
expiration interval for the newly added interpreter. If the user is
not able to confirm in time, possibly because the system is broken,
the new interpreter is automatically disabled.
Signed-off-by: Domenico Andreoli <domenico.andreoli@xxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
---
Documentation/admin-guide/binfmt-misc.rst | 12 +++
fs/binfmt_misc.c | 112 ++++++++++++++++++++++++++++--
2 files changed, 113 insertions(+), 11 deletions(-)
Index: b/Documentation/admin-guide/binfmt-misc.rst
===================================================================
--- a/Documentation/admin-guide/binfmt-misc.rst
+++ b/Documentation/admin-guide/binfmt-misc.rst
@@ -16,8 +16,8 @@ First you must mount binfmt_misc::
mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc
To actually register a new binary type, you have to set up a string looking like
-``:name:type:offset:magic:mask:interpreter:flags`` (where you can choose the
-``:`` upon your needs) and echo it to ``/proc/sys/fs/binfmt_misc/register``.
+``:name:type:offset:magic:mask:interpreter:flags:timeout`` (where you can choose
+the ``:`` upon your needs) and echo it to ``/proc/sys/fs/binfmt_misc/register``.
Here is what the fields mean:
@@ -88,6 +88,14 @@ Here is what the fields mean:
emulation is installed and uses the opened image to spawn the
emulator, meaning it is always available once installed,
regardless of how the environment changes.
+- ``timeout``
+ is an optional field; the newly added interpreter is automatically
+ disabled after the specified number of seconds. To cancel such
+ count down, cat or echo something to ``/proc/.../the_name``. This
+ registration in two steps allows recovering a system left unusable
+ by some wrong configuration. A timeout of 0 seconds effectively adds
+ a disabled interpreter. Values smaller than 0 or bigger than 120
+ are invalid.
There are some restrictions:
Index: b/fs/binfmt_misc.c
===================================================================
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,8 @@
#include <linux/syscalls.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
#include "internal.h"
@@ -49,6 +51,8 @@ enum {Enabled, Magic};
#define MISC_FMT_CREDENTIALS (1 << 29)
#define MISC_FMT_OPEN_FILE (1 << 28)
+struct node_timer;
+
typedef struct {
struct list_head list;
unsigned long flags; /* type, status, etc. */
@@ -60,8 +64,15 @@ typedef struct {
char *name;
struct dentry *dentry;
struct file *interp_file;
+ struct node_timer *auto_disable;
+ spinlock_t auto_disable_lock;
} Node;
+struct node_timer {
+ struct timer_list timer;
+ Node *node;
+};
+
static DEFINE_RWLOCK(entries_lock);
static struct file_system_type bm_fs_type;
static struct vfsmount *bm_mnt;
@@ -69,19 +80,30 @@ static int entry_count;
/*
* Max length of the register string. Determined by:
- * - 7 delimiters
- * - name: ~50 bytes
- * - type: 1 byte
- * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
- * - magic: 128 bytes (512 in escaped form)
- * - mask: 128 bytes (512 in escaped form)
- * - interp: ~50 bytes
- * - flags: 5 bytes
+ * - 8 delimiters
+ * - name: ~50 bytes
+ * - type: 1 byte
+ * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
+ * - magic: 128 bytes (512 in escaped form)
+ * - mask: 128 bytes (512 in escaped form)
+ * - interp: ~50 bytes
+ * - flags: 5 bytes
+ * - timeout: 3 bytes
* Round that up a bit, and then back off to hold the internal data
* (like struct Node).
*/
#define MAX_REGISTER_LENGTH 1920
+#define MAX_AUTO_DISABLE_TIMEOUT 120
+
+static void auto_disable_timer_fn(struct timer_list *t)
+{
+ struct node_timer *timer = container_of(t, struct node_timer, timer);
+
+ clear_bit(Enabled, &timer->node->flags);
+ pr_info("%s: auto-disabled\n", timer->node->name);
+}
+
/*
* Check if we support the binfmt
* if we do, return the node, else NULL
@@ -266,6 +288,41 @@ static char *check_special_flags(char *s
return p;
}
+static char *setup_auto_disable(char *p, char *endp, Node *e)
+{
+ unsigned int timeout;
+ char buf[4] = {0};
+
+ while (endp[-1] == '\n')
+ endp--;
+ if (p >= endp || *p != ':' || ++p == endp)
+ return p;
+
+ endp = min(endp, p + sizeof(buf) - 1);
+ memcpy(buf, p, (size_t) (endp - p));
+
+ if (kstrtouint(buf, 10, &timeout) || timeout > MAX_AUTO_DISABLE_TIMEOUT) {
+ pr_info("%s: invalid timeout: %s\n", e->name, buf);
+ return p;
+ }
+
+ if (timeout == 0) {
+ e->flags &= ~(1 << Enabled);
+ return endp;
+ }
+
+ e->auto_disable = kmalloc(sizeof(struct node_timer), GFP_KERNEL);
+ if (!e->auto_disable)
+ return ERR_PTR(-ENOMEM);
+
+ pr_info("%s: auto-disable in %u seconds\n", e->name, timeout);
+
+ timer_setup(&e->auto_disable->timer, auto_disable_timer_fn, 0);
+ e->auto_disable->timer.expires = jiffies + timeout * HZ;
+ e->auto_disable->node = e;
+ return endp;
+}
+
/*
* This registers a new binary format, it recognises the syntax
* ':name:type:offset:magic:mask:interpreter:flags'
@@ -273,7 +330,7 @@ static char *check_special_flags(char *s
*/
static Node *create_entry(const char __user *buffer, size_t count)
{
- Node *e;
+ Node *e = NULL;
int memsize, err;
char *buf, *p;
char del;
@@ -297,6 +354,8 @@ static Node *create_entry(const char __u
if (copy_from_user(buf, buffer, count))
goto efault;
+ spin_lock_init(&e->auto_disable_lock);
+
del = *p++; /* delimeter */
pr_debug("register: delim: %#x {%c}\n", del, del);
@@ -454,6 +513,14 @@ static Node *create_entry(const char __u
/* Parse the 'flags' field. */
p = check_special_flags(p, e);
+
+ /* Parse the 'timeout' field and init the auto-disable timer. */
+ p = setup_auto_disable(p, buf + count, e);
+ if (IS_ERR(p)) {
+ err = PTR_ERR(p);
+ goto out;
+ }
+
if (*p == '\n')
p++;
if (p != buf + count)
@@ -462,12 +529,15 @@ static Node *create_entry(const char __u
return e;
out:
+ kfree(e);
return ERR_PTR(err);
efault:
kfree(e);
return ERR_PTR(-EFAULT);
einval:
+ if (e)
+ kfree(e->auto_disable);
kfree(e);
return ERR_PTR(-EINVAL);
}
@@ -499,6 +569,21 @@ static int parse_command(const char __us
/* generic stuff */
+static void cancel_auto_disable(Node *e)
+{
+ struct node_timer *auto_disable = NULL;
+
+ spin_lock(&e->auto_disable_lock);
+ swap(e->auto_disable, auto_disable);
+ spin_unlock(&e->auto_disable_lock);
+
+ if (auto_disable) {
+ if (del_timer_sync(&auto_disable->timer))
+ pr_info("%s: cancelled auto-disable\n", e->name);
+ kfree(auto_disable);
+ }
+}
+
static void entry_status(Node *e, char *page)
{
char *dp = page;
@@ -559,6 +644,8 @@ static void bm_evict_inode(struct inode
if (e && e->flags & MISC_FMT_OPEN_FILE)
filp_close(e->interp_file, NULL);
+ if (e)
+ cancel_auto_disable(e);
clear_inode(inode);
kfree(e);
@@ -588,6 +675,8 @@ bm_entry_read(struct file *file, char __
ssize_t res;
char *page;
+ cancel_auto_disable(e);
+
page = (char *) __get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
@@ -607,6 +696,8 @@ static ssize_t bm_entry_write(struct fil
Node *e = file_inode(file)->i_private;
int res = parse_command(buffer, count);
+ cancel_auto_disable(e);
+
switch (res) {
case 1:
/* Disable this handler. */
@@ -699,6 +790,9 @@ static ssize_t bm_register_write(struct
list_add(&e->list, &entries);
write_unlock(&entries_lock);
+ if (e->auto_disable)
+ add_timer(&e->auto_disable->timer);
+
err = 0;
out2:
dput(dentry);