Re: [PATCH RFC] coredump: virtualize core dump path configuration

From: Jann Horn
Date: Sat Feb 25 2017 - 11:14:48 EST


On Sat, Feb 25, 2017 at 03:56:13PM +0300, Konstantin Khlebnikov wrote:
> This patch adds per-mount-namespace core dump pattern.
>
> Kernel writes coredump in chroot/container where application is
> executed or starts pipe helper in the same chroot according to
> pattern set by sysctl "kernel.core_pattern".

I'm pretty sure it doesn't, and if it did, that would be a security bug.

Coredump helpers have, as far as I know, always been executed in the
root directory of init, not inside the chroot.

Absolute core patterns used to write coredumps into the root directory
of the chroot, and that was a security issue, which I fixed:

https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=378c6520e7d29280f400ef2ceaf155c86f05a71a

> This configuration is
> global and this sysctl couldn't be extended without breaking anything.
>
> This patch adds second sysctl "kernel.core_pattern_ns" which overrides
> global configuration for tasks in current mount namespace.
> Resetting it to empty string reverts core dumps back to global pattern.
>
> New namespace gets a copy of this configuration from parent.
> [...]
> +char *namespace_core_pattern(bool alloc)
> +{
> + struct mnt_namespace *ns = current->nsproxy->mnt_ns;
> +
> + if (!ns->core_pattern && alloc) {
> + char *new = kzalloc(CORENAME_MAX_SIZE, GFP_KERNEL);
> +
> + if (new && cmpxchg(&ns->core_pattern, NULL, new))
> + kfree(new);
> + }
> +
> + return ns->core_pattern;
> +}
> +
> +static char *current_core_pattern(void)
> +{
> + struct mnt_namespace *ns = current->nsproxy->mnt_ns;
> +
> + if (ns->core_pattern && ns->core_pattern[0])
> + return ns->core_pattern;
> +
> + return core_pattern;
> +}



> /* format_corename will inspect the pattern parameter, and output a
> * name into corename, which must have space for at least
> * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
> @@ -187,7 +212,7 @@ static int cn_print_exe_file(struct core_name *cn)
> static int format_corename(struct core_name *cn, struct coredump_params *cprm)
> {
> const struct cred *cred = current_cred();
> - const char *pat_ptr = core_pattern;
> + const char *pat_ptr = current_core_pattern();
> int ispipe = (*pat_ptr == '|');
> int pid_in_pattern = 0;
> int err = 0;

If you don't change more here, the behavior is going to be that
the namespaced core pattern causes coredump helpers to be launched
and absolute-path coredumps to be written relative to init's root
directory and with the privileges of root in the init namespace.

Those semantics seem unintuitive to me. Shouldn't the coredumps
be written relative to some sort of root directory of the user
namespace? (Yes, I realize that no clear semantics for that are
defined.) And shouldn't the coredumps be written with the
privileges of the user namespace on which the coredump pattern
was set?


> diff --git a/fs/mount.h b/fs/mount.h
> index 2c856fc47ae3..894bca887104 100644
> --- a/fs/mount.h
> +++ b/fs/mount.h
> @@ -16,6 +16,7 @@ struct mnt_namespace {
> u64 event;
> unsigned int mounts; /* # of mounts in the namespace */
> unsigned int pending_mounts;
> + char *core_pattern;
> };
>
> struct mnt_pcp {
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 487ba30bb5c6..a8dd58eb10da 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -24,6 +24,7 @@
> #include <linux/magic.h>
> #include <linux/bootmem.h>
> #include <linux/task_work.h>
> +#include <linux/binfmts.h> /* CORENAME_MAX_SIZE */
> #include "pnode.h"
> #include "internal.h"
>
> @@ -2828,6 +2829,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
> ns_free_inum(&ns->ns);
> dec_mnt_namespaces(ns->ucounts);
> put_user_ns(ns->user_ns);
> + kfree(ns->core_pattern);
> kfree(ns);
> }
>
> @@ -2872,6 +2874,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
> new_ns->ucounts = ucounts;
> new_ns->mounts = 0;
> new_ns->pending_mounts = 0;
> + new_ns->core_pattern = NULL;
> return new_ns;
> }
>
> @@ -2899,6 +2902,15 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
> if (IS_ERR(new_ns))
> return new_ns;
>
> + if (ns->core_pattern) {
> + new_ns->core_pattern = kmemdup(ns->core_pattern,
> + CORENAME_MAX_SIZE, GFP_KERNEL);
> + if (!new_ns->core_pattern) {
> + free_mnt_ns(new_ns);
> + return ERR_PTR(-ENOMEM);
> + }
> + }
> +

AFAICS copying it this way means that in the following scenario,
namespace B will still use the core_pattern "foo" while namespace A
is using core_pattern "bar":
- you set the core_pattern of namespace A to "foo"
- namespace A creates a child namespace B
- you set the core_pattern of namespace A to "bar"

Those are pretty unintuitive semantics. It might be better
to not copy the pattern and instead loop up through the
namespaces in current_core_pattern() or so.

Also, wouldn't you have to use an atomic read here or so?

> namespace_lock();
> /* First pass: copy the tree topology */
> copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 1aea594a54db..9e66daf1e236 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -483,6 +483,13 @@ static struct ctl_table kern_table[] = {
> .proc_handler = proc_dostring_coredump,
> },
> {
> + .procname = "core_pattern_ns",
> + .data = NULL,
> + .maxlen = CORENAME_MAX_SIZE,
> + .mode = 0644,
> + .proc_handler = proc_dostring_coredump,
> + },
> + {

Only root can write it? That seems weird if you intend to use it
for containers.

> .procname = "core_pipe_limit",
> .data = &core_pipe_limit,
> .maxlen = sizeof(unsigned int),
> @@ -2408,10 +2415,27 @@ static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
> }
>
> #ifdef CONFIG_COREDUMP
> +extern char *namespace_core_pattern(bool alloc);
> +
> static int proc_dostring_coredump(struct ctl_table *table, int write,
> void __user *buffer, size_t *lenp, loff_t *ppos)
> {
> - int error = proc_dostring(table, write, buffer, lenp, ppos);
> + struct ctl_table tmp_table;
> + char empty[] = "";
> + int error;
> +
> + if (!table->data) {
> + tmp_table = *table;
> + table = &tmp_table;
> + table->data = namespace_core_pattern(write);
> + if (!table->data) {
> + if (write)
> + return -ENOMEM;
> + table->data = empty;
> + }
> + }
> +
> + error = proc_dostring(table, write, buffer, lenp, ppos);
> if (!error)
> validate_coredump_safety();
> return error;
>
> _______________________________________________
> Containers mailing list
> Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linuxfoundation.org/mailman/listinfo/containers

Attachment: signature.asc
Description: Digital signature