[RFC PATCH 3/4] sysctl: userns: Do not use dynamic memory
From: Alexey Gladkov
Date: Wed Jun 01 2022 - 09:21:14 EST
Dynamic memory allocation is needed to modify .data and specify the
per namespace parameter. The new sysctl API is allowed to get rid of
the need for such modification.
Signed-off-by: Alexey Gladkov <legion@xxxxxxxxxx>
---
include/linux/user_namespace.h | 6 --
kernel/ucount.c | 116 +++++++++++++--------------------
kernel/user_namespace.c | 10 +--
3 files changed, 46 insertions(+), 86 deletions(-)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c4..7b134516e5cb 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -95,10 +95,6 @@ struct user_namespace {
struct key *persistent_keyring_register;
#endif
struct work_struct work;
-#ifdef CONFIG_SYSCTL
- struct ctl_table_set set;
- struct ctl_table_header *sysctls;
-#endif
struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS];
long rlimit_max[UCOUNT_RLIMIT_COUNTS];
@@ -116,8 +112,6 @@ struct ucounts {
extern struct user_namespace init_user_ns;
extern struct ucounts init_ucounts;
-bool setup_userns_sysctls(struct user_namespace *ns);
-void retire_userns_sysctls(struct user_namespace *ns);
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
diff --git a/kernel/ucount.c b/kernel/ucount.c
index ee8e57fd6f90..4a5072671847 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -7,6 +7,7 @@
#include <linux/hash.h>
#include <linux/kmemleak.h>
#include <linux/user_namespace.h>
+#include <linux/fs.h>
struct ucounts init_ucounts = {
.ns = &init_user_ns,
@@ -26,38 +27,20 @@ static DEFINE_SPINLOCK(ucounts_lock);
#ifdef CONFIG_SYSCTL
-static struct ctl_table_set *
-set_lookup(struct ctl_table_root *root)
-{
- return ¤t_user_ns()->set;
-}
-
-static int set_is_seen(struct ctl_table_set *set)
-{
- return ¤t_user_ns()->set == set;
-}
-
-static int set_permissions(struct ctl_table_header *head,
- struct ctl_table *table)
-{
- struct user_namespace *user_ns =
- container_of(head->set, struct user_namespace, set);
- int mode;
-
- /* Allow users with CAP_SYS_RESOURCE unrestrained access */
- if (ns_capable(user_ns, CAP_SYS_RESOURCE))
- mode = (table->mode & S_IRWXU) >> 6;
- else
- /* Allow all others at most read-only access */
- mode = table->mode & S_IROTH;
- return (mode << 6) | (mode << 3) | mode;
-}
-
-static struct ctl_table_root set_root = {
- .lookup = set_lookup,
- .permissions = set_permissions,
+static int user_sys_open(struct ctl_context *ctx, struct inode *inode,
+ struct file *file);
+static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos);
+static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos);
+
+static struct ctl_fops user_sys_fops = {
+ .open = user_sys_open,
+ .read = user_sys_read,
+ .write = user_sys_write,
};
+static long ue_dummy = 0;
static long ue_zero = 0;
static long ue_int_max = INT_MAX;
@@ -66,9 +49,11 @@ static long ue_int_max = INT_MAX;
.procname = name, \
.maxlen = sizeof(long), \
.mode = 0644, \
+ .data = &ue_dummy, \
.proc_handler = proc_doulongvec_minmax, \
.extra1 = &ue_zero, \
.extra2 = &ue_int_max, \
+ .ctl_fops = &user_sys_fops, \
}
static struct ctl_table user_table[] = {
UCOUNT_ENTRY("max_user_namespaces"),
@@ -89,44 +74,43 @@ static struct ctl_table user_table[] = {
#endif
{ }
};
-#endif /* CONFIG_SYSCTL */
-bool setup_userns_sysctls(struct user_namespace *ns)
+static int user_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file)
{
-#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
-
- BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
- setup_sysctl_set(&ns->set, &set_root, set_is_seen);
- tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
- if (tbl) {
- int i;
- for (i = 0; i < UCOUNT_COUNTS; i++) {
- tbl[i].data = &ns->ucount_max[i];
- }
- ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
- }
- if (!ns->sysctls) {
- kfree(tbl);
- retire_sysctl_set(&ns->set);
- return false;
- }
-#endif
- return true;
+ /* Allow users with CAP_SYS_RESOURCE unrestrained access */
+ if ((file->f_mode & FMODE_WRITE) &&
+ !ns_capable(file->f_cred->user_ns, CAP_SYS_RESOURCE))
+ return -EPERM;
+ return 0;
}
-void retire_userns_sysctls(struct user_namespace *ns)
+static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos)
{
-#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
+ struct ctl_table table = *ctx->table;
+ table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table];
+ return table.proc_handler(&table, 0, buffer, lenp, ppos);
+}
- tbl = ns->sysctls->ctl_table_arg;
- unregister_sysctl_table(ns->sysctls);
- retire_sysctl_set(&ns->set);
- kfree(tbl);
-#endif
+static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table table = *ctx->table;
+ table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table];
+ return table.proc_handler(&table, 1, buffer, lenp, ppos);
}
+static struct ctl_table user_root_table[] = {
+ {
+ .procname = "user",
+ .mode = 0555,
+ .child = user_table,
+ },
+ {}
+};
+
+#endif /* CONFIG_SYSCTL */
+
static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
{
struct ucounts *ucounts;
@@ -357,17 +341,7 @@ bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigne
static __init int user_namespace_sysctl_init(void)
{
#ifdef CONFIG_SYSCTL
- static struct ctl_table_header *user_header;
- static struct ctl_table empty[1];
- /*
- * It is necessary to register the user directory in the
- * default set so that registrations in the child sets work
- * properly.
- */
- user_header = register_sysctl("user", empty);
- kmemleak_ignore(user_header);
- BUG_ON(!user_header);
- BUG_ON(!setup_userns_sysctls(&init_user_ns));
+ register_sysctl_table(user_root_table);
#endif
hlist_add_ucounts(&init_ucounts);
inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 981bb2d10d83..c0e707bc9a31 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -149,17 +149,10 @@ int create_user_ns(struct cred *new)
INIT_LIST_HEAD(&ns->keyring_name_list);
init_rwsem(&ns->keyring_sem);
#endif
- ret = -ENOMEM;
- if (!setup_userns_sysctls(ns))
- goto fail_keyring;
set_cred_user_ns(new, ns);
return 0;
-fail_keyring:
-#ifdef CONFIG_PERSISTENT_KEYRINGS
- key_put(ns->persistent_keyring_register);
-#endif
- ns_free_inum(&ns->ns);
+
fail_free:
kmem_cache_free(user_ns_cachep, ns);
fail_dec:
@@ -208,7 +201,6 @@ static void free_user_ns(struct work_struct *work)
kfree(ns->projid_map.forward);
kfree(ns->projid_map.reverse);
}
- retire_userns_sysctls(ns);
key_free_user_ns(ns);
ns_free_inum(&ns->ns);
kmem_cache_free(user_ns_cachep, ns);
--
2.33.3