[RESEND PATCH v4 2/5] sysctl: support encoding values directly in the table entry

From: Wen Yang
Date: Tue Oct 08 2024 - 11:16:32 EST


Eric points out: "by turning .extra1 and .extra2 into longs instead of
keeping them as pointers and needing constants to be pointed at somewhere
.. The only people I can see who find a significant benefit by
consolidating all of the constants into one place are people who know how
to stomp kernel memory."

This patch supports encoding values directly in table entries through the
following work:
- extra1/extra2 and min/max are placed in one union to ensure compatibility
with previous code without increasing memory overhead, and then we could
gradually remove these unnecessary extra1/extra2.
- two bits were used to represent the information of the above union:
SYSCTL_FLAG_MIN: 0, using extra1. 1, using min.
SYSCTL_FLAG_MAX: 0, using extra2. 1, using max.
- since the proc file's mode field only uses 9 bits(777), we could use the
additional two bits(S_ISUID and S_ISGID) to temporarily represent
SYSCTL_FLAG_MIN and SYSCTL_FLAG_MAX.
- added some helper macros.

By introducing long min/max to replace void * extra1/extra2 in most cases,
unnecessary variables can be removed to save memory and avoid attacks.

Signed-off-by: Wen Yang <wen.yang@xxxxxxxxx>
Cc: Luis Chamberlain <mcgrof@xxxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Joel Granados <j.granados@xxxxxxxxxxx>
Cc: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
Cc: Christian Brauner <brauner@xxxxxxxxxx>
Cc: Dave Young <dyoung@xxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
fs/proc/proc_sysctl.c | 8 ++++++--
include/linux/sysctl.h | 24 ++++++++++++++++++++----
2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 90c99eb1abf6..e88d1dca2a80 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -848,8 +848,11 @@ static int proc_sys_getattr(struct mnt_idmap *idmap,
return PTR_ERR(head);

generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
- if (table)
+ if (table) {
stat->mode = (stat->mode & S_IFMT) | table->mode;
+ stat->mode &= ~SYSCTL_FLAG_MIN;
+ stat->mode &= ~SYSCTL_FLAG_MAX;
+ }

sysctl_head_finish(head);
return 0;
@@ -1163,7 +1166,8 @@ static int sysctl_check_table(const char *path, struct ctl_table_header *header)
if (!entry->proc_handler)
err |= sysctl_err(path, entry, "No proc_handler");

- if ((entry->mode & (S_IRUGO|S_IWUGO)) != entry->mode)
+ if ((entry->mode & (S_IRUGO|S_IWUGO|SYSCTL_FLAG_MIN|SYSCTL_FLAG_MAX))
+ != entry->mode)
err |= sysctl_err(path, entry, "bogus .mode 0%o",
entry->mode);
}
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index edfcb22b1e3d..859b016aa76e 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -28,6 +28,7 @@
#include <linux/rbtree.h>
#include <linux/uidgid.h>
#include <uapi/linux/sysctl.h>
+#include <uapi/linux/stat.h>

/* For the /proc/sys support */
struct completion;
@@ -131,6 +132,9 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
#define DEFINE_CTL_TABLE_POLL(name) \
struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)

+#define SYSCTL_FLAG_MIN S_ISUID
+#define SYSCTL_FLAG_MAX S_ISGID
+
/* A sysctl table is an array of struct ctl_table: */
struct ctl_table {
const char *procname; /* Text ID for /proc/sys, or zero */
@@ -139,8 +143,16 @@ struct ctl_table {
umode_t mode;
proc_handler *proc_handler; /* Callback for text formatting */
struct ctl_table_poll *poll;
- void *extra1;
- void *extra2;
+ union {
+ struct {
+ void *extra1;
+ void *extra2;
+ };
+ struct {
+ long min;
+ long max;
+ };
+ };
} __randomize_layout;

struct ctl_node {
@@ -212,9 +224,13 @@ struct ctl_table_root {
#define register_sysctl(path, table) \
register_sysctl_sz(path, table, ARRAY_SIZE(table))

-#define __SYSCTL_RANGE_MIN(_a, _b, _c) (((_a)->extra1) ? *(_b((_a)->extra1)) : (_c))
+#define __SYSCTL_RANGE_EXTRA1(_a, _b, _c) (((_a)->extra1) ? *(_b((_a)->extra1)) : (_c))
+#define __SYSCTL_RANGE_MIN(_a, _b, _c) ((((_a)->mode) & SYSCTL_FLAG_MIN) ? \
+ ((_a)->min) : __SYSCTL_RANGE_EXTRA1(_a, _b, _c))

-#define __SYSCTL_RANGE_MAX(_a, _b, _c) (((_a)->extra2) ? *(_b((_a)->extra2)) : (_c))
+#define __SYSCTL_RANGE_EXTRA2(_a, _b, _c) (((_a)->extra2) ? *(_b((_a)->extra2)) : (_c))
+#define __SYSCTL_RANGE_MAX(_a, _b, _c) ((((_a)->mode) & SYSCTL_FLAG_MAX) ? \
+ ((_a)->max) : __SYSCTL_RANGE_EXTRA2(_a, _b, _c))

static inline unsigned int sysctl_range_min_u8(const struct ctl_table *table)
{
--
2.25.1