Re: [PATCH 5/6] kdbus: pin namespaces on HELLO

From: Sergei Zviagintsev
Date: Fri Jul 03 2015 - 17:44:59 EST


Hi,

On Thu, Jul 02, 2015 at 10:28:33AM +0200, David Herrmann wrote:
> Whenever we send messages to a target connection, all we know about the
> target is the 'struct file' associated with the kdbus connection. Hence,
> we cannot know which namespaces a receiving process will be in when it
> calls KDBUS_CMD_RECV on the message. So far, we pinned all metadata we
> wanna send and translate it on RECV-time, since we then know the exact
> namespaces to translate into.
>
> This has several drawbacks:
> - Depending on the process calling RECV, the behavior is different (as
> multiple processes might be in different namespaces but share the same
> fd). This is unwanted behavior, as described by Eric here:
> http://www.spinics.net/lists/netdev/msg329322.html
> - We need to pin metadata with a message instead of translating it right
> away.
> - We cannot prep a message at SEND time as we don't know the size of the
> translated metadata. Hence, we need to do all that at RECV time.
>
> This patch changes the namespace behavior. Instead of using the namespaces
> at RECV time, we now pin the namespaces at HELLO (i.e., open()). So
> regardless who calls RECV on this file-descriptor, the same namespaces
> will be used.
> This gives us the advantage that we now always know the target namespaces
> for a message. Hence, we can now properly prep a message at SEND time and
> never have to carry any metadata pins around.
>
> Signed-off-by: David Herrmann <dh.herrmann@xxxxxxxxx>
> ---
> ipc/kdbus/bus.c | 2 +-
> ipc/kdbus/connection.c | 8 +++++++-
> ipc/kdbus/connection.h | 6 ++++++
> ipc/kdbus/metadata.c | 54 ++++++++++++++++++++++++++------------------------
> ipc/kdbus/metadata.h | 1 +
> ipc/kdbus/queue.c | 1 +
> 6 files changed, 44 insertions(+), 28 deletions(-)
>
> diff --git a/ipc/kdbus/bus.c b/ipc/kdbus/bus.c
> index 7d2c336..8fffc2f 100644
> --- a/ipc/kdbus/bus.c
> +++ b/ipc/kdbus/bus.c
> @@ -507,7 +507,7 @@ int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, void __user *argp)
> goto exit;
> }
>
> - ret = kdbus_meta_export(bus->creator_meta, NULL, attach_flags,
> + ret = kdbus_meta_export(bus->creator_meta, NULL, conn, attach_flags,
> slice, hdr_size, &meta_size);
> if (ret < 0)
> goto exit;
> diff --git a/ipc/kdbus/connection.c b/ipc/kdbus/connection.c
> index e5e9c1e..bf9a8a6 100644
> --- a/ipc/kdbus/connection.c
> +++ b/ipc/kdbus/connection.c
> @@ -130,6 +130,9 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
> atomic_set(&conn->lost_count, 0);
> INIT_DELAYED_WORK(&conn->work, kdbus_reply_list_scan_work);
> conn->cred = get_current_cred();
> + conn->user_ns = get_user_ns(current_user_ns());
> + conn->pid_ns = get_pid_ns(task_active_pid_ns(current));
> + get_fs_root(current->fs, &conn->root_path);
> init_waitqueue_head(&conn->wait);
> kdbus_queue_init(&conn->queue);
> conn->privileged = privileged;
> @@ -271,6 +274,9 @@ static void __kdbus_conn_free(struct kref *kref)
> kdbus_match_db_free(conn->match_db);
> kdbus_pool_free(conn->pool);
> kdbus_ep_unref(conn->ep);
> + path_put(&conn->root_path);
> + put_pid_ns(conn->pid_ns);
> + put_user_ns(conn->user_ns);
> put_cred(conn->cred);
> kfree(conn->description);
> kfree(conn->quota);
> @@ -1792,7 +1798,7 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
> goto exit;
> }
>
> - ret = kdbus_meta_export(owner_conn->meta, conn_meta, attach_flags,
> + ret = kdbus_meta_export(owner_conn->meta, conn_meta, conn, attach_flags,
> slice, sizeof(info), &meta_size);
> if (ret < 0)
> goto exit;
> diff --git a/ipc/kdbus/connection.h b/ipc/kdbus/connection.h
> index d1ffe90..226f3ff 100644
> --- a/ipc/kdbus/connection.h
> +++ b/ipc/kdbus/connection.h
> @@ -59,6 +59,9 @@ struct kdbus_kmsg;
> * @pool: The user's buffer to receive messages
> * @user: Owner of the connection
> * @cred: The credentials of the connection at creation time
> + * @user_ns: User namespace at creation time
> + * @pid_ns: Pid namespace at creation time

Pid -> PID ?

> + * @root_path: Root path at creation time
> * @name_count: Number of owned well-known names
> * @request_count: Number of pending requests issued by this
> * connection that are waiting for replies from
> @@ -97,6 +100,9 @@ struct kdbus_conn {
> struct kdbus_pool *pool;
> struct kdbus_user *user;
> const struct cred *cred;
> + struct user_namespace *user_ns;
> + struct pid_namespace *pid_ns;
> + struct path root_path;
> atomic_t name_count;
> atomic_t request_count;
> atomic_t lost_count;
> diff --git a/ipc/kdbus/metadata.c b/ipc/kdbus/metadata.c
> index c36b9cc..79f0e8c 100644
> --- a/ipc/kdbus/metadata.c
> +++ b/ipc/kdbus/metadata.c
> @@ -888,7 +888,8 @@ static int kdbus_meta_push_kvec(struct kvec *kvec,
> }
>
> static void kdbus_meta_export_caps(struct kdbus_meta_caps *out,
> - struct kdbus_meta_proc *mp)
> + struct kdbus_meta_proc *mp,
> + struct user_namespace *user_ns)
> {
> struct user_namespace *iter;
> const struct cred *cred = mp->cred;
> @@ -896,18 +897,18 @@ static void kdbus_meta_export_caps(struct kdbus_meta_caps *out,
> int i;
>
> /*
> - * This translates the effective capabilities of 'cred' into the current
> - * user-namespace. If the current user-namespace is a child-namespace of
> + * This translates the effective capabilities of 'cred' into the given
> + * user-namespace. If the given user-namespace is a child-namespace of
> * the user-namespace of 'cred', the mask can be copied verbatim. If
> * not, the mask is cleared.
> * There's one exception: If 'cred' is the owner of any user-namespace
> - * in the path between the current user-namespace and the user-namespace
> + * in the path between the given user-namespace and the user-namespace
> * of 'cred', then it has all effective capabilities set. This means,
> * the user who created a user-namespace always has all effective
> * capabilities in any child namespaces. Note that this is based on the
> * uid of the namespace creator, not the task hierarchy.
> */
> - for (iter = current_user_ns(); iter; iter = iter->parent) {
> + for (iter = user_ns; iter; iter = iter->parent) {

Is check `iter' for being not NULL is needed here? I mean, `iter' takes
range from `user_ns' (which is cred->user_ns) to &init_user_ns.

> if (iter == cred->user_ns) {
> parent = true;
> break;
> @@ -951,23 +952,22 @@ static void kdbus_meta_export_caps(struct kdbus_meta_caps *out,
> }
>
> /* This is equivalent to from_kuid_munged(), but maps INVALID_UID to itself */
> -static uid_t kdbus_from_kuid_keep(kuid_t uid)
> +static uid_t kdbus_from_kuid_keep(struct user_namespace *ns, kuid_t uid)
> {
> - return uid_valid(uid) ?
> - from_kuid_munged(current_user_ns(), uid) : ((uid_t)-1);
> + return uid_valid(uid) ? from_kuid_munged(ns, uid) : ((uid_t)-1);
> }
>
> /* This is equivalent to from_kgid_munged(), but maps INVALID_GID to itself */
> -static gid_t kdbus_from_kgid_keep(kgid_t gid)
> +static gid_t kdbus_from_kgid_keep(struct user_namespace *ns, kgid_t gid)
> {
> - return gid_valid(gid) ?
> - from_kgid_munged(current_user_ns(), gid) : ((gid_t)-1);
> + return gid_valid(gid) ? from_kgid_munged(ns, gid) : ((gid_t)-1);
> }
>
> /**
> * kdbus_meta_export() - export information from metadata into a slice
> * @mp: Process metadata, or NULL
> * @mc: Connection metadata, or NULL
> + * @conn: Target connection to translate metadata into
> * @mask: Mask of KDBUS_ATTACH_* flags to export
> * @slice: The slice to export to
> * @offset: The offset inside @slice to write to
> @@ -983,18 +983,19 @@ static gid_t kdbus_from_kgid_keep(kgid_t gid)
> * kdbus_meta_export_prepare(); depending on the namespaces in question, it
> * might use up less than that.
> *
> - * All information will be translated using the current namespaces.
> + * All information will be translated using the namespaces of @conn.
> *
> * Return: 0 on success, negative error number otherwise.
> */
> int kdbus_meta_export(struct kdbus_meta_proc *mp,
> struct kdbus_meta_conn *mc,
> + struct kdbus_conn *conn,
> u64 mask,
> struct kdbus_pool_slice *slice,
> off_t offset,
> size_t *real_size)
> {
> - struct user_namespace *user_ns = current_user_ns();
> + struct user_namespace *user_ns = conn->user_ns;
> struct kdbus_item_header item_hdr[13], *hdr;
> char *exe_pathname = NULL;
> struct kdbus_creds creds;
> @@ -1016,23 +1017,23 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
> /* process metadata */
>
> if (mp && (mask & KDBUS_ATTACH_CREDS)) {
> - creds.uid = kdbus_from_kuid_keep(mp->uid);
> - creds.euid = kdbus_from_kuid_keep(mp->euid);
> - creds.suid = kdbus_from_kuid_keep(mp->suid);
> - creds.fsuid = kdbus_from_kuid_keep(mp->fsuid);
> - creds.gid = kdbus_from_kgid_keep(mp->gid);
> - creds.egid = kdbus_from_kgid_keep(mp->egid);
> - creds.sgid = kdbus_from_kgid_keep(mp->sgid);
> - creds.fsgid = kdbus_from_kgid_keep(mp->fsgid);
> + creds.uid = kdbus_from_kuid_keep(user_ns, mp->uid);
> + creds.euid = kdbus_from_kuid_keep(user_ns, mp->euid);
> + creds.suid = kdbus_from_kuid_keep(user_ns, mp->suid);
> + creds.fsuid = kdbus_from_kuid_keep(user_ns, mp->fsuid);
> + creds.gid = kdbus_from_kgid_keep(user_ns, mp->gid);
> + creds.egid = kdbus_from_kgid_keep(user_ns, mp->egid);
> + creds.sgid = kdbus_from_kgid_keep(user_ns, mp->sgid);
> + creds.fsgid = kdbus_from_kgid_keep(user_ns, mp->fsgid);
>
> cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++, KDBUS_ITEM_CREDS,
> &creds, sizeof(creds), &size);
> }
>
> if (mp && (mask & KDBUS_ATTACH_PIDS)) {
> - pids.pid = pid_vnr(mp->tgid);
> - pids.tid = pid_vnr(mp->pid);
> - pids.ppid = pid_vnr(mp->ppid);
> + pids.pid = pid_nr_ns(mp->tgid, conn->pid_ns);
> + pids.tid = pid_nr_ns(mp->pid, conn->pid_ns);
> + pids.ppid = pid_nr_ns(mp->ppid, conn->pid_ns);
>
> cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++, KDBUS_ITEM_PIDS,
> &pids, sizeof(pids), &size);
> @@ -1078,7 +1079,8 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
> */
>
> get_fs_root(current->fs, &p);
> - if (path_equal(&p, &mp->root_path)) {
> + if (path_equal(&p, &mp->root_path) &&
> + path_equal(&p, &conn->root_path)) {
> exe_page = (void *)__get_free_page(GFP_TEMPORARY);
> if (!exe_page) {
> path_put(&p);
> @@ -1116,7 +1118,7 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
> if (mp && (mask & KDBUS_ATTACH_CAPS)) {
> struct kdbus_meta_caps caps = {};
>
> - kdbus_meta_export_caps(&caps, mp);
> + kdbus_meta_export_caps(&caps, mp, user_ns);
> cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
> KDBUS_ITEM_CAPS, &caps,
> sizeof(caps), &size);
> diff --git a/ipc/kdbus/metadata.h b/ipc/kdbus/metadata.h
> index 79b6ac3..2dbbb3d 100644
> --- a/ipc/kdbus/metadata.h
> +++ b/ipc/kdbus/metadata.h
> @@ -46,6 +46,7 @@ int kdbus_meta_export_prepare(struct kdbus_meta_proc *mp,
> u64 *mask, size_t *sz);
> int kdbus_meta_export(struct kdbus_meta_proc *mp,
> struct kdbus_meta_conn *mc,
> + struct kdbus_conn *conn,
> u64 mask,
> struct kdbus_pool_slice *slice,
> off_t offset, size_t *real_size);
> diff --git a/ipc/kdbus/queue.c b/ipc/kdbus/queue.c
> index 25bb3ad..6650b78 100644
> --- a/ipc/kdbus/queue.c
> +++ b/ipc/kdbus/queue.c
> @@ -479,6 +479,7 @@ int kdbus_queue_entry_install(struct kdbus_queue_entry *entry,
>
> ret = kdbus_meta_export(entry->proc_meta,
> entry->conn_meta,
> + conn_dst,
> entry->attach_flags,
> entry->slice,
> entry->meta_offset,
> --
> 2.4.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/