[RFC v1 08/14] bus1: implement peer management context

From: David Herrmann
Date: Wed Oct 26 2016 - 15:24:28 EST


From: Tom Gundersen <teg@xxxxxxx>

A peer context provides access to the bus1 system. A peer itself is not
a routable entity, but rather only a local anchor to serve as gateway to
the bus. To participate on the bus, you need to allocate a peer. This
peer manages all your state on the bus, including all allocated nodes,
owned handles, incoming messages, and more.

A peer is split into 3 sections:
- A static section that is initialized at peer creation and never
changes
- A peer-local section that is only ever accessed by ioctls done by
the peer itself.
- A data section that might be accessed by remote peers when
interacting with this peer.

All peers on the system operate on the same level. There is no context
a peer is linked into. Hence, you can never lock multiple peers at the
same time. Instead, peers provide active-references. Before performing
an operation on a peer, an active reference must be acquired, and hold
as long as the operation goes on. When done, the reference is released
again. When a peer is disconnected, no more active references can be
acquired, and any outstanding operation is waited for before the peer
is destroyed.

Additionally to active-references, there are 2 locks: A peer-local lock
and a data lock. The peer-local lock is used to synchronize operations
done by the peer itself. It is never acquired by a remote peer. The
data lock protects the data of the peer, which might be modified by
remote peers. The data lock nests underneath the local-lock.
Furthermore, the data-lock critical sections must be kept small and
never block indefinitely. Remote peers might wait for data-locks, hence
they must rely on not being DoSed. The local peer lock, however, is
private to the peer itself. Not such restrictions apply. It is mostly
used to give the impression of atomic operations (i.e., making the API
appear consistent and coherent).

This only adds the peer context, the ioctls will be implemented in
follow-up patches.

Signed-off-by: Tom Gundersen <teg@xxxxxxx>
Signed-off-by: David Herrmann <dh.herrmann@xxxxxxxxx>
---
ipc/bus1/Makefile | 2 +
ipc/bus1/main.c | 17 +++++++
ipc/bus1/main.h | 14 ++++++
ipc/bus1/peer.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++
ipc/bus1/peer.h | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
ipc/bus1/util.c | 52 +++++++++++++++++++
ipc/bus1/util.h | 51 +++++++++++++++++++
7 files changed, 427 insertions(+)
create mode 100644 ipc/bus1/peer.c
create mode 100644 ipc/bus1/peer.h
create mode 100644 ipc/bus1/util.c
create mode 100644 ipc/bus1/util.h

diff --git a/ipc/bus1/Makefile b/ipc/bus1/Makefile
index 94d79e0..c689917 100644
--- a/ipc/bus1/Makefile
+++ b/ipc/bus1/Makefile
@@ -1,6 +1,8 @@
bus1-y := \
main.o \
+ peer.o \
user.o \
+ util.o \
util/active.o \
util/flist.o \
util/pool.o \
diff --git a/ipc/bus1/main.c b/ipc/bus1/main.c
index 526347d..51034f3 100644
--- a/ipc/bus1/main.c
+++ b/ipc/bus1/main.c
@@ -15,24 +15,41 @@
#include <linux/miscdevice.h>
#include <linux/module.h>
#include "main.h"
+#include "peer.h"
#include "tests.h"
#include "user.h"

static int bus1_fop_open(struct inode *inode, struct file *file)
{
+ struct bus1_peer *peer;
+
+ peer = bus1_peer_new();
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
+
+ file->private_data = peer;
return 0;
}

static int bus1_fop_release(struct inode *inode, struct file *file)
{
+ bus1_peer_free(file->private_data);
return 0;
}

+static void bus1_fop_show_fdinfo(struct seq_file *m, struct file *file)
+{
+ struct bus1_peer *peer = file->private_data;
+
+ seq_printf(m, KBUILD_MODNAME "-peer:\t%16llx\n", peer->id);
+}
+
const struct file_operations bus1_fops = {
.owner = THIS_MODULE,
.open = bus1_fop_open,
.release = bus1_fop_release,
.llseek = noop_llseek,
+ .show_fdinfo = bus1_fop_show_fdinfo,
};

static struct miscdevice bus1_misc = {
diff --git a/ipc/bus1/main.h b/ipc/bus1/main.h
index 76fce66..dd319d9 100644
--- a/ipc/bus1/main.h
+++ b/ipc/bus1/main.h
@@ -49,6 +49,20 @@
* ordered, including unicasts, multicasts, and notifications.
*/

+/**
+ * Locking
+ *
+ * Most of the bus1 objects form a hierarchy, as such, their locks must be
+ * ordered. Not all orders are explicitly defined (e.g., they might define
+ * orthogonal hierarchies), but this list gives a rough overview:
+ *
+ * bus1_peer.active
+ * bus1_peer.local.lock
+ * bus1_peer.data.lock
+ * bus1_user.lock
+ * bus1_user_lock
+ */
+
struct dentry;
struct file_operations;

diff --git a/ipc/bus1/peer.c b/ipc/bus1/peer.c
new file mode 100644
index 0000000..a6fbca01
--- /dev/null
+++ b/ipc/bus1/peer.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/atomic.h>
+#include <linux/cred.h>
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/pid_namespace.h>
+#include <linux/rbtree.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+#include <linux/wait.h>
+#include "main.h"
+#include "peer.h"
+#include "user.h"
+#include "util.h"
+#include "util/active.h"
+
+/**
+ * bus1_peer_new() - allocate new peer
+ *
+ * Allocate a new peer. It is immediately activated and ready for use. It is
+ * not linked into any context. The caller will get exclusively access to the
+ * peer object on success.
+ *
+ * Note that the peer is opened on behalf of 'current'. That is, it pins its
+ * credentials and namespaces.
+ *
+ * Return: Pointer to peer, ERR_PTR on failure.
+ */
+struct bus1_peer *bus1_peer_new(void)
+{
+ static atomic64_t peer_ids = ATOMIC64_INIT(0);
+ const struct cred *cred = current_cred();
+ struct bus1_peer *peer;
+ struct bus1_user *user;
+
+ user = bus1_user_ref_by_uid(cred->uid);
+ if (IS_ERR(user))
+ return ERR_CAST(user);
+
+ peer = kmalloc(sizeof(*peer), GFP_KERNEL);
+ if (!peer) {
+ bus1_user_unref(user);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* initialize constant fields */
+ peer->id = atomic64_inc_return(&peer_ids);
+ peer->flags = 0;
+ peer->cred = get_cred(current_cred());
+ peer->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ peer->user = user;
+ peer->debugdir = NULL;
+ init_waitqueue_head(&peer->waitq);
+ bus1_active_init(&peer->active);
+
+ /* initialize data section */
+ mutex_init(&peer->data.lock);
+
+ /* initialize peer-private section */
+ mutex_init(&peer->local.lock);
+
+ if (!IS_ERR_OR_NULL(bus1_debugdir)) {
+ char idstr[22];
+
+ snprintf(idstr, sizeof(idstr), "peer-%llx", peer->id);
+
+ peer->debugdir = debugfs_create_dir(idstr, bus1_debugdir);
+ if (!peer->debugdir) {
+ pr_err("cannot create debugfs dir for peer %llx\n",
+ peer->id);
+ } else if (!IS_ERR_OR_NULL(peer->debugdir)) {
+ bus1_debugfs_create_atomic_x("active", S_IRUGO,
+ peer->debugdir,
+ &peer->active.count);
+ }
+ }
+
+ bus1_active_activate(&peer->active);
+ return peer;
+}
+
+static int bus1_peer_disconnect(struct bus1_peer *peer)
+{
+ bus1_active_deactivate(&peer->active);
+ bus1_active_drain(&peer->active, &peer->waitq);
+
+ if (!bus1_active_cleanup(&peer->active, &peer->waitq,
+ NULL, NULL))
+ return -ESHUTDOWN;
+
+ return 0;
+}
+
+/**
+ * bus1_peer_free() - destroy peer
+ * @peer: peer to destroy, or NULL
+ *
+ * Destroy a peer object that was previously allocated via bus1_peer_new().
+ * This synchronously waits for any outstanding operations on this peer to
+ * finish, then releases all linked resources and deallocates the peer in an
+ * rcu-delayed manner.
+ *
+ * If NULL is passed, this is a no-op.
+ *
+ * Return: NULL is returned.
+ */
+struct bus1_peer *bus1_peer_free(struct bus1_peer *peer)
+{
+ if (!peer)
+ return NULL;
+
+ /* disconnect from environment */
+ bus1_peer_disconnect(peer);
+
+ /* deinitialize peer-private section */
+ mutex_destroy(&peer->local.lock);
+
+ /* deinitialize data section */
+ mutex_destroy(&peer->data.lock);
+
+ /* deinitialize constant fields */
+ debugfs_remove_recursive(peer->debugdir);
+ bus1_active_deinit(&peer->active);
+ peer->user = bus1_user_unref(peer->user);
+ put_pid_ns(peer->pid_ns);
+ put_cred(peer->cred);
+ kfree_rcu(peer, rcu);
+
+ return NULL;
+}
diff --git a/ipc/bus1/peer.h b/ipc/bus1/peer.h
new file mode 100644
index 0000000..277fcf8
--- /dev/null
+++ b/ipc/bus1/peer.h
@@ -0,0 +1,146 @@
+#ifndef __BUS1_PEER_H
+#define __BUS1_PEER_H
+
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+/**
+ * DOC: Peers
+ *
+ * A peer context provides access to the bus1 system. A peer itself is not a
+ * routable entity, but rather only a local anchor to serve as gateway to the
+ * bus. To participate on the bus, you need to allocate a peer. This peer
+ * manages all your state on the bus, including all allocated nodes, owned
+ * handles, incoming messages, and more.
+ *
+ * A peer is split into 3 sections:
+ * - A static section that is initialized at peer creation and never changes
+ * - A peer-local section that is only ever accessed by ioctls done by the
+ * peer itself.
+ * - A data section that might be accessed by remote peers when interacting
+ * with this peer.
+ *
+ * All peers on the system operate on the same level. There is no context a
+ * peer is linked into. Hence, you can never lock multiple peers at the same
+ * time. Instead, peers provide active-references. Before performing an
+ * operation on a peer, an active reference must be acquired, and hold as long
+ * as the operation goes on. When done, the reference is released again.
+ * When a peer is disconnected, no more active references can be acquired, and
+ * any outstanding operation is waited for before the peer is destroyed.
+ *
+ * Additionally to active-references, there are 2 locks: A peer-local lock and
+ * a data lock. The peer-local lock is used to synchronize operations done by
+ * the peer itself. It is never acquired by a remote peer. The data lock
+ * protects the data of the peer, which might be modified by remote peers. The
+ * data lock nests underneath the local-lock. Furthermore, the data-lock
+ * critical sections must be kept small and never block indefinitely. Remote
+ * peers might wait for data-locks, hence they must rely on not being DoSed.
+ * The local peer lock, however, is private to the peer itself. Not such
+ * restrictions apply. It is mostly used to give the impression of atomic
+ * operations (i.e., making the API appear consistent and coherent).
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/rbtree.h>
+#include <linux/wait.h>
+#include "user.h"
+#include "util/active.h"
+
+struct cred;
+struct dentry;
+struct pid_namespace;
+
+/**
+ * struct bus1_peer - peer context
+ * @id: peer ID
+ * @flags: peer flags
+ * @cred: pinned credentials
+ * @pid_ns: pinned pid-namespace
+ * @user: pinned user
+ * @rcu: rcu-delayed kfree of peer
+ * @waitq: peer wide wait queue
+ * @active: active references
+ * @debugdir: debugfs root of this peer, or NULL/ERR_PTR
+ * @data.lock: data lock
+ * @local.lock: local peer runtime lock
+ */
+struct bus1_peer {
+ u64 id;
+ u64 flags;
+ const struct cred *cred;
+ struct pid_namespace *pid_ns;
+ struct bus1_user *user;
+ struct rcu_head rcu;
+ wait_queue_head_t waitq;
+ struct bus1_active active;
+ struct dentry *debugdir;
+
+ struct {
+ struct mutex lock;
+ } data;
+
+ struct {
+ struct mutex lock;
+ } local;
+};
+
+struct bus1_peer *bus1_peer_new(void);
+struct bus1_peer *bus1_peer_free(struct bus1_peer *peer);
+
+/**
+ * bus1_peer_acquire() - acquire active reference to peer
+ * @peer: peer to operate on, or NULL
+ *
+ * Acquire a new active reference to the given peer. If the peer was not
+ * activated yet, or if it was already deactivated, this will fail.
+ *
+ * If NULL is passed, this is a no-op.
+ *
+ * Return: Pointer to peer, NULL on failure.
+ */
+static inline struct bus1_peer *bus1_peer_acquire(struct bus1_peer *peer)
+{
+ if (peer && bus1_active_acquire(&peer->active))
+ return peer;
+ return NULL;
+}
+
+/**
+ * bus1_peer_release() - release an active reference
+ * @peer: handle to release, or NULL
+ *
+ * This releases an active reference to a peer, acquired previously via
+ * bus1_peer_acquire().
+ *
+ * If NULL is passed, this is a no-op.
+ *
+ * Return: NULL is returned.
+ */
+static inline struct bus1_peer *bus1_peer_release(struct bus1_peer *peer)
+{
+ if (peer) {
+ /*
+ * An active reference is sufficient to keep a peer alive. As
+ * such, releasing the active-reference might wake up a pending
+ * peer destruction. But bus1_active_release() has to first
+ * drop the ref, then wake up the wake-queue. Taking an rcu
+ * read lock guarantees the wake-queue (i.e., its underlying
+ * peer) is still around for the wake-up operation.
+ */
+ rcu_read_lock();
+ bus1_active_release(&peer->active, &peer->waitq);
+ rcu_read_unlock();
+ }
+ return NULL;
+}
+
+#endif /* __BUS1_PEER_H */
diff --git a/ipc/bus1/util.c b/ipc/bus1/util.c
new file mode 100644
index 0000000..8acf798
--- /dev/null
+++ b/ipc/bus1/util.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include "util.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int bus1_debugfs_atomic_t_get(void *data, u64 *val)
+{
+ *val = atomic_read((atomic_t *)data);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(bus1_debugfs_atomic_x_ro,
+ bus1_debugfs_atomic_t_get,
+ NULL,
+ "%llx\n");
+
+/**
+ * bus1_debugfs_create_atomic_x() - create debugfs file for hex atomic_t
+ * @name: file name to use
+ * @mode: permissions for the file
+ * @parent: parent directory
+ * @value: variable to read from, or write to
+ *
+ * This is almost equivalent to debugfs_create_atomic_t() but prints/reads the
+ * data as hexadecimal value. So far, only read-only attributes are supported.
+ *
+ * Return: Pointer to new dentry, NULL/ERR_PTR if disabled or on failure.
+ */
+struct dentry *bus1_debugfs_create_atomic_x(const char *name,
+ umode_t mode,
+ struct dentry *parent,
+ atomic_t *value)
+{
+ return debugfs_create_file_unsafe(name, mode, parent, value,
+ &bus1_debugfs_atomic_x_ro);
+}
+
+#endif /* defined(CONFIG_DEBUG_FS) */
diff --git a/ipc/bus1/util.h b/ipc/bus1/util.h
new file mode 100644
index 0000000..b9f9e8d
--- /dev/null
+++ b/ipc/bus1/util.h
@@ -0,0 +1,51 @@
+#ifndef __BUS1_UTIL_H
+#define __BUS1_UTIL_H
+
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+/**
+ * Utilities
+ *
+ * Random utility functions that don't belong to a specific object. Some of
+ * them are copies from internal kernel functions (which lack an export
+ * annotation), some of them are variants of internal kernel functions, and
+ * some of them are our own.
+ */
+
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+struct dentry;
+
+#if defined(CONFIG_DEBUG_FS)
+
+struct dentry *
+bus1_debugfs_create_atomic_x(const char *name,
+ umode_t mode,
+ struct dentry *parent,
+ atomic_t *value);
+
+#else
+
+static inline struct dentry *
+bus1_debugfs_create_atomic_x(const char *name,
+ umode_t mode,
+ struct dentry *parent,
+ atomic_t *value)
+{
+ return ERR_PTR(-ENODEV);
+}
+
+#endif
+
+#endif /* __BUS1_UTIL_H */
--
2.10.1