Re: [PATCH v2 2/9] sysctl: add proper unsigned int support

From: Kees Cook
Date: Mon Feb 13 2017 - 15:20:12 EST


On Fri, Feb 10, 2017 at 4:36 PM, Luis R. Rodriguez <mcgrof@xxxxxxxxxx> wrote:
> Commit e7d316a02f6838 ("sysctl: handle error writing UINT_MAX to u32
> fields") added proc_douintvec() to start help adding support for
> unsigned int, this however was only half the work needed, all these
> issues are present with the current implementation:
>
> o Printing the values shows a negative value, this happens since
> do_proc_dointvec() and this uses proc_put_long()
> o We can easily wrap around the int values: UINT_MAX is 4294967295,
> if we echo in 4294967295 + 1 we end up with 0, using 4294967295 + 2
> we end up with 1.
> o We echo negative values in and they are accepted
> o sysctl_check_table() was never extended for proc_douintvec()
>
> Fix all these issues by adding our own do_proc_douintvec() and adding
> proc_douintvec() to sysctl_check_table().
>
> Historically sysctl proc helpers have supported arrays, due to the
> complexity this adds though we've taken a step back to evaluate array
> users to determine if its worth upkeeping for unsigned int. An
> evaluation using Coccinelle has been done to perform a grammatical
> search to ask ourselves:
>
> o How many sysctl proc_dointvec() (int) users exist which likely
> should be moved over to proc_douintvec() (unsigned int) ?
> Answer: about 8
> - Of these how many are array users ?
> Answer: Probably only 1
> o How many sysctl array users exist ?
> Answer: about 12
>
> This last question gives us an idea just how popular arrays: they
> are not. Array support should probably just be kept for strings.
>
> The identified uint ports are:
>
> drivers/infiniband/core/ucma.c - max_backlog
> drivers/infiniband/core/iwcm.c - default_backlog
> net/core/sysctl_net_core.c - rps_sock_flow_sysctl()
> net/netfilter/nf_conntrack_timestamp.c - nf_conntrack_timestamp -- bool
> net/netfilter/nf_conntrack_acct.c nf_conntrack_acct -- bool
> net/netfilter/nf_conntrack_ecache.c - nf_conntrack_events -- bool
> net/netfilter/nf_conntrack_helper.c - nf_conntrack_helper -- bool
> net/phonet/sysctl.c proc_local_port_range()
>
> The only possible array users is proc_local_port_range() but it does not
> seem worth it to add array support just for this given the range support
> works just as well. Unsigned int support should be desirable more for
> when you *need* more than INT_MAX or using int min/max support then
> does not suffice for your ranges.
>
> If you forget and by mistake happen to register an unsigned int proc entry
> with an array, the driver will fail and you will get something as follows:
>
> sysctl table check failed: debug/test_sysctl//uint_0002 array now allowed
> CPU: 2 PID: 1342 Comm: modprobe Tainted: G W E <etc>
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS <etc>
> Call Trace:
> dump_stack+0x63/0x81
> __register_sysctl_table+0x350/0x650
> ? kmem_cache_alloc_trace+0x107/0x240
> __register_sysctl_paths+0x1b3/0x1e0
> ? 0xffffffffc005f000
> register_sysctl_table+0x1f/0x30
> test_sysctl_init+0x10/0x1000 [test_sysctl]
> do_one_initcall+0x52/0x1a0
> ? kmem_cache_alloc_trace+0x107/0x240
> do_init_module+0x5f/0x200
> load_module+0x1867/0x1bd0
> ? __symbol_put+0x60/0x60
> SYSC_finit_module+0xdf/0x110
> SyS_finit_module+0xe/0x10
> entry_SYSCALL_64_fastpath+0x1e/0xad
> RIP: 0033:0x7f042b22d119
> <etc>
>
> Cc: Subash Abhinov Kasiviswanathan <subashab@xxxxxxxxxxxxxx>
> Cc: Heinrich Schuchardt <xypron.glpk@xxxxxx>
> Cc: Kees Cook <keescook@xxxxxxxxxxxx>
> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields")
> Signed-off-by: Luis R. Rodriguez <mcgrof@xxxxxxxxxx>
> ---
> fs/proc/proc_sysctl.c | 15 +++++
> kernel/sysctl.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 170 insertions(+), 6 deletions(-)
>
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index d22ee738d2eb..73696a73a1ec 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -1031,6 +1031,18 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
> return -EINVAL;
> }
>
> +static int sysctl_check_table_array(const char *path, struct ctl_table *table)
> +{
> + int err = 0;
> +
> + if (table->proc_handler == proc_douintvec) {

Should this be inverted? i.e. explicitly allow proc_handlers instead
of only rejecting douintvec?

> + if (table->maxlen != sizeof(unsigned int))
> + err |= sysctl_err(path, table, "array now allowed");
> + }
> +
> + return err;
> +}
> +
> static int sysctl_check_table(const char *path, struct ctl_table *table)
> {
> int err = 0;
> @@ -1040,6 +1052,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
>
> if ((table->proc_handler == proc_dostring) ||
> (table->proc_handler == proc_dointvec) ||
> + (table->proc_handler == proc_douintvec) ||
> (table->proc_handler == proc_dointvec_minmax) ||
> (table->proc_handler == proc_dointvec_jiffies) ||
> (table->proc_handler == proc_dointvec_userhz_jiffies) ||
> @@ -1050,6 +1063,8 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
> err |= sysctl_err(path, table, "No data");
> if (!table->maxlen)
> err |= sysctl_err(path, table, "No maxlen");
> + else
> + err |= sysctl_check_table_array(path, table);
> }
> if (!table->proc_handler)
> err |= sysctl_err(path, table, "No proc_handler");
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 1aea594a54db..493bc05e546a 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2125,12 +2125,12 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
> return 0;
> }
>
> -static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
> - int *valp,
> - int write, void *data)
> +static int do_proc_douintvec_conv(unsigned long *lvalp,
> + unsigned int *valp,
> + int write, void *data)
> {
> if (write) {
> - if (*negp)
> + if (*lvalp > UINT_MAX)
> return -EINVAL;
> *valp = *lvalp;
> } else {
> @@ -2243,6 +2243,155 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
> buffer, lenp, ppos, conv, data);
> }
>
> +static int do_proc_douintvec_w(unsigned int *tbl_data,
> + struct ctl_table *table,
> + void __user *buffer,
> + size_t *lenp, loff_t *ppos,
> + int (*conv)(unsigned long *lvalp,
> + unsigned int *valp,
> + int write, void *data),
> + void *data)
> +{
> + unsigned long lval;
> + int err = 0;
> + size_t left;
> + bool neg;
> + char *kbuf = NULL, *p;
> +
> + left = *lenp;
> +
> + if (*ppos) {
> + switch (sysctl_writes_strict) {
> + case SYSCTL_WRITES_STRICT:
> + goto bail_early;
> + case SYSCTL_WRITES_WARN:
> + warn_sysctl_write(table);
> + break;
> + default:
> + break;
> + }
> + }

I wonder if this SYSCTL_WRITES_* test copy/pasting needs to be a function?

> +
> + if (left > PAGE_SIZE - 1)
> + left = PAGE_SIZE - 1;
> +
> + p = kbuf = memdup_user_nul(buffer, left);
> + if (IS_ERR(kbuf))
> + return -EINVAL;
> +
> + left -= proc_skip_spaces(&p);
> + if (!left) {
> + err = -EINVAL;
> + goto out_free;
> + }
> +
> + err = proc_get_long(&p, &left, &lval, &neg,
> + proc_wspace_sep,
> + sizeof(proc_wspace_sep), NULL);
> + if (err || neg) {
> + err = -EINVAL;
> + goto out_free;
> + }
> +
> + if (conv(&lval, tbl_data, 1, data)) {
> + err = -EINVAL;
> + goto out_free;
> + }
> +
> + if (!err && left)
> + left -= proc_skip_spaces(&p);
> +
> +out_free:
> + kfree(kbuf);
> + if (err)
> + return -EINVAL;
> +
> + return 0;
> +
> + /* This is in keeping with old __do_proc_dointvec() */
> +bail_early:
> + *ppos += *lenp;
> + return err;
> +}
> +
> +static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
> + size_t *lenp, loff_t *ppos,
> + int (*conv)(unsigned long *lvalp,
> + unsigned int *valp,
> + int write, void *data),
> + void *data)
> +{
> + unsigned long lval;
> + int err = 0;
> + size_t left;
> +
> + left = *lenp;
> +
> + if (conv(&lval, tbl_data, 0, data)) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + err = proc_put_long(&buffer, &left, lval, false);
> + if (err || !left)
> + goto out;
> +
> + err = proc_put_char(&buffer, &left, '\n');
> +
> +out:
> + *lenp -= left;
> + *ppos += *lenp;
> +
> + return err;
> +}
> +
> +static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
> + int write, void __user *buffer,
> + size_t *lenp, loff_t *ppos,
> + int (*conv)(unsigned long *lvalp,
> + unsigned int *valp,
> + int write, void *data),
> + void *data)
> +{
> + unsigned int *i, vleft;
> +
> + if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
> + *lenp = 0;
> + return 0;
> + }
> +
> + i = (unsigned int *) tbl_data;
> + vleft = table->maxlen / sizeof(*i);
> +
> + /*
> + * Arrays are not supported, keep this simple. *Do not* add
> + * support for them.
> + */
> + if (vleft != 1) {
> + *lenp = 0;
> + return -EINVAL;
> + }
> +
> + if (!conv)
> + conv = do_proc_douintvec_conv;
> +
> + if (write)
> + return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
> + conv, data);
> + return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
> +}

I like the split of read/write. That makes things easier to review.

> +
> +static int do_proc_douintvec(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp, loff_t *ppos,
> + int (*conv)(unsigned long *lvalp,
> + unsigned int *valp,
> + int write, void *data),
> + void *data)
> +{
> + return __do_proc_douintvec(table->data, table, write,
> + buffer, lenp, ppos, conv, data);
> +}
> +
> /**
> * proc_dointvec - read a vector of integers
> * @table: the sysctl table
> @@ -2278,8 +2427,8 @@ int proc_dointvec(struct ctl_table *table, int write,
> int proc_douintvec(struct ctl_table *table, int write,
> void __user *buffer, size_t *lenp, loff_t *ppos)
> {
> - return do_proc_dointvec(table, write, buffer, lenp, ppos,
> - do_proc_douintvec_conv, NULL);
> + return do_proc_douintvec(table, write, buffer, lenp, ppos,
> + do_proc_douintvec_conv, NULL);
> }
>
> /*
> --
> 2.11.0
>

-Kees

--
Kees Cook
Pixel Security