[RFC DRAFT] Adds PUI: process unic identifier

From: jobol
Date: Tue Jan 10 2017 - 17:05:43 EST


From: Josà Bollo <jobol@xxxxxxxxxxx>

Hi all,

I'd like to get your feeling about the idea
exposed in that draft. Should continue or stop
immediately? Is there already some existing work?
How is the taken approach?

BR - Josà Bollo

[RFC DRAFT] Adds PUI: process unic identifier

The name 'pui' is choosen -instead of puid-
to avoid confusion with either pid and uid.
It is intended to identify uniquely each
activated task item, accordling to namespaces.

64 bits seems to be a good deal for beginning.
The count of second in a year is less than 2^25.
So if a huge machine is able to create 2^31 processes
per second (ex: 2^7 cores, each creating 2^24 processes
-a nighmare-), then the unic id is over in 2^8 years.
Far more than what a regular system upgrade needs.

The pui is represented as a hexadecimal value because
it is much more efficient.

Signed-off-by: Josà Bollo <jobol@xxxxxxxxxxx>
---
fs/proc/array.c | 21 +++++
include/linux/pid.h | 5 ++
include/linux/pid_namespace.h | 4 +
include/linux/pui.h | 55 +++++++++++++
include/uapi/asm-generic/socket.h | 2 +
init/Kconfig | 7 ++
kernel/Makefile | 1 +
kernel/pid.c | 12 +++
kernel/pid_namespace.c | 3 +
kernel/pui.c | 168 ++++++++++++++++++++++++++++++++++++++
net/core/sock.c | 20 +++++
11 files changed, 298 insertions(+)
create mode 100644 include/linux/pui.h
create mode 100644 kernel/pui.c

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 51a4213..6350a1e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -153,6 +153,16 @@ static inline int get_task_umask(struct task_struct *tsk)
return umask;
}

+#ifdef CONFIG_PUI
+static inline void put_pui(struct seq_file *m, pui_t pui)
+{
+ pui_str_t str;
+
+ pui_to_str(pui, str);
+ seq_puts(m, str);
+}
+#endif
+
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p)
{
@@ -226,6 +236,17 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
for (g = ns->level; g <= pid->level; g++)
seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns));
#endif
+#ifdef CONFIG_PUI
+ seq_puts(m, "\nPui:");
+ put_pui(m, pid->numbers[ns->level].pui);
+#ifdef CONFIG_PID_NS
+ seq_puts(m, "\nNSpui:");
+ for (g = ns->level; g <= pid->level; g++) {
+ seq_putc(m, '\t');
+ put_pui(m, pid->numbers[g].pui);
+ }
+#endif
+#endif
seq_putc(m, '\n');
}

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 23705a5..9bb131c 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -2,6 +2,7 @@
#define _LINUX_PID_H

#include <linux/rcupdate.h>
+#include <linux/pui.h>

enum pid_type
{
@@ -52,6 +53,10 @@ struct upid {
int nr;
struct pid_namespace *ns;
struct hlist_node pid_chain;
+#ifdef CONFIG_PUI
+ struct hlist_node pui_chain;
+ pui_t pui;
+#endif
};

struct pid
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 34cce96..cd4844c 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -9,6 +9,7 @@
#include <linux/nsproxy.h>
#include <linux/kref.h>
#include <linux/ns_common.h>
+#include <linux/pui.h>

struct pidmap {
atomic_t nr_free;
@@ -26,6 +27,9 @@ struct pid_namespace {
struct pidmap pidmap[PIDMAP_ENTRIES];
struct rcu_head rcu;
int last_pid;
+#ifdef CONFIG_PUI
+ pui_gen_t pui_generator;
+#endif
unsigned int nr_hashed;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
diff --git a/include/linux/pui.h b/include/linux/pui.h
new file mode 100644
index 0000000..fee67e2
--- /dev/null
+++ b/include/linux/pui.h
@@ -0,0 +1,55 @@
+#ifndef _LINUX_PUI_H
+#define _LINUX_PUI_H
+
+#ifdef CONFIG_PUI
+
+#include <linux/atomic.h>
+
+typedef __u64 pui_t;
+typedef char pui_str_t[17];
+typedef atomic64_t pui_gen_t;
+
+struct pid;
+struct upid;
+struct task_struct;
+struct pid_namespace;
+
+#define PUI_INVALID 0
+#define PUI_GEN_INIT ATOMIC_INIT(0)
+
+/*
+ * look up a PUI in the hash table. Must be called with the tasklist_lock
+ * or rcu_read_lock() held.
+ *
+ * find_pui_ns() finds the pui in the namespace specified
+ * find_vpui() finds the pui by its virtual id, i.e. in the current namespace
+ */
+extern struct pid *find_pui_ns(pui_t pui, struct pid_namespace *ns);
+extern struct pid *find_vpui(pui_t pui);
+
+/*
+ * find a task by its PUI
+ *
+ * find_task_by_pui_ns():
+ * finds a task by its pui in the specified namespace
+ * find_task_by_vpui():
+ * finds a task by its virtual pui
+ */
+extern struct task_struct *find_task_by_pui_ns(pui_t pui, struct pid_namespace *ns);
+extern struct task_struct *find_task_by_vpui(pui_t pui);
+
+extern pui_t pui_nr_ns(struct pid *pid, struct pid_namespace *ns);
+extern pui_t pui_vnr(struct pid *pid);
+
+extern void pui_init_generator(pui_gen_t *generator);
+
+extern void pui_make(struct upid *upid);
+extern void pui_add(struct upid *upid);
+extern void pui_del(struct upid *upid);
+
+extern int pui_to_str(pui_t pui, pui_str_t str);
+extern pui_t pui_from_str(const char *str);
+
+#endif
+
+#endif /* _LINUX_PUI_H */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 2c748dd..143bcbe 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -94,4 +94,6 @@

#define SCM_TIMESTAMPING_OPT_STATS 54

+#define SO_PEERPUI 55
+
#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/init/Kconfig b/init/Kconfig
index 223b734..ff4dde2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -306,6 +306,12 @@ config USELIB
earlier, you may need to enable this syscall. Current systems
running glibc can safely disable this.

+config PUI
+ bool "Enables Process Unic Identifier"
+ default n
+ help
+ Provides a 64 bits unic identifier to processes and threads
+
config AUDIT
bool "Auditing support"
depends on NET
@@ -2158,3 +2164,4 @@ config ASN1
functions to call on what tags.

source "kernel/Kconfig.locks"
+
diff --git a/kernel/Makefile b/kernel/Makefile
index 12c679f..ebd1cd8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
+obj-$(CONFIG_PUI) += pui.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
diff --git a/kernel/pid.c b/kernel/pid.c
index f66162f..24b76fe 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -75,6 +75,9 @@ struct pid_namespace init_pid_ns = {
[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
},
.last_pid = 0,
+#ifdef CONFIG_PUI
+ .pui_generator = PUI_GEN_INIT,
+#endif
.nr_hashed = PIDNS_HASH_ADDING,
.level = 0,
.child_reaper = &init_task,
@@ -267,6 +270,9 @@ void free_pid(struct pid *pid)
struct upid *upid = pid->numbers + i;
struct pid_namespace *ns = upid->ns;
hlist_del_rcu(&upid->pid_chain);
+#ifdef CONFIG_PUI
+ pui_del(upid);
+#endif
switch(--ns->nr_hashed) {
case 2:
case 1:
@@ -318,6 +324,9 @@ struct pid *alloc_pid(struct pid_namespace *ns)

pid->numbers[i].nr = nr;
pid->numbers[i].ns = tmp;
+#ifdef CONFIG_PUI
+ pui_make(&pid->numbers[i]);
+#endif
tmp = tmp->parent;
}

@@ -338,6 +347,9 @@ struct pid *alloc_pid(struct pid_namespace *ns)
for ( ; upid >= pid->numbers; --upid) {
hlist_add_head_rcu(&upid->pid_chain,
&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+#ifdef CONFIG_PUI
+ pui_add(upid);
+#endif
upid->ns->nr_hashed++;
}
spin_unlock_irq(&pidmap_lock);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index df9e8e9..ed17097 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -130,6 +130,9 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->ucounts = ucounts;
ns->nr_hashed = PIDNS_HASH_ADDING;
INIT_WORK(&ns->proc_work, proc_cleanup_work);
+#ifdef CONFIG_PUI
+ pui_init_generator(&ns->pui_generator);
+#endif

set_bit(0, ns->pidmap[0].page);
atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
diff --git a/kernel/pui.c b/kernel/pui.c
new file mode 100644
index 0000000..d9ab6bb
--- /dev/null
+++ b/kernel/pui.c
@@ -0,0 +1,168 @@
+#ifdef CONFIG_PUI
+
+/*
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/rculist.h>
+#include <linux/bootmem.h>
+#include <linux/hash.h>
+#include <linux/init_task.h>
+#include <linux/syscalls.h>
+#include <linux/proc_ns.h>
+#include <linux/proc_fs.h>
+*/
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/rculist.h>
+#include <linux/hash.h>
+#include <linux/pui.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+
+#define pui_hashfn(pui, ns) \
+ hash_long((unsigned long)pui + (unsigned long)ns, puihash_shift)
+
+static struct hlist_head *pui_hash;
+static unsigned int puihash_shift = 4;
+
+void __init puihash_init(void)
+{
+ unsigned int i, pidhash_size;
+
+ pui_hash = alloc_large_system_hash("PUI", sizeof(*pui_hash), 0, 18,
+ HASH_EARLY | HASH_SMALL,
+ &puihash_shift, NULL,
+ 0, 4096);
+ pidhash_size = 1U << puihash_shift;
+
+ for (i = 0; i < pidhash_size; i++)
+ INIT_HLIST_HEAD(&pui_hash[i]);
+}
+
+struct pid *find_pui_ns(pui_t pui, struct pid_namespace *ns)
+{
+ struct upid *pnr;
+
+ hlist_for_each_entry_rcu(pnr,
+ &pui_hash[pui_hashfn(pui, ns)], pui_chain)
+ if (pnr->pui == pui && pnr->ns == ns)
+ return container_of(pnr, struct pid,
+ numbers[ns->level]);
+
+ return NULL;
+}
+
+struct task_struct *find_task_by_pui_ns(pui_t pui, struct pid_namespace *ns)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
+ "find_task_by_pui_ns() needs rcu_read_lock() protection");
+ return pid_task(find_pui_ns(pui, ns), PIDTYPE_PID);
+}
+
+struct pid *find_vpui(pui_t pui)
+{
+ return find_pui_ns(pui, task_active_pid_ns(current));
+}
+
+struct task_struct *find_task_by_vpui(pui_t pui)
+{
+ return find_task_by_pui_ns(pui, task_active_pid_ns(current));
+}
+
+pui_t pui_nr_ns(struct pid *pid, struct pid_namespace *ns)
+{
+ struct upid *upid;
+ pui_t result = PUI_INVALID;
+
+ if (pid && ns->level <= pid->level) {
+ upid = &pid->numbers[ns->level];
+ if (upid->ns == ns)
+ result = upid->pui;
+ }
+ return result;
+}
+
+pui_t pui_vnr(struct pid *pid)
+{
+ return pui_nr_ns(pid, task_active_pid_ns(current));
+}
+
+void pui_init_generator(pui_gen_t *generator)
+{
+ atomic64_set((atomic64_t*)generator, 0);
+}
+
+void pui_del(struct upid *upid)
+{
+ hlist_del_rcu(&upid->pui_chain);
+}
+
+void pui_add(struct upid *upid)
+{
+ hlist_add_head_rcu(&upid->pui_chain,
+ &pui_hash[pui_hashfn(upid->pui, upid->ns)]);
+}
+
+static inline pui_t pui_new(pui_gen_t *generator)
+{
+ pui_t result;
+ do { result = atomic64_inc_return((atomic64_t*)&generator); } while(result == PUI_INVALID);
+ return result;
+}
+
+void pui_make(struct upid *upid)
+{
+ upid->pui = pui_new(&upid->ns->pui_generator);
+}
+
+int pui_to_str(pui_t pui, pui_str_t str)
+{
+ char c;
+ int i, j, r;
+
+ i = 0;
+ do {
+ c = (char)(pui & 15);
+ pui >>= 4;
+ str[i++] = (char)(c + (c > 9 ? 'a' - 10 : '0'));
+ } while(pui);
+ str[r = i] = 0;
+ j = 0;
+ while(j < --i) {
+ c = str[i];
+ str[i] = str[j];
+ str[j++] = c;
+ }
+ return r;
+}
+
+pui_t pui_from_str(const char *str)
+{
+ char c;
+ pui_t result = 0;
+
+ c = *str;
+ if (!c)
+ return PUI_INVALID;
+ do {
+ if ('0' <= c && c <= '9')
+ c = c - '0';
+ else if ('a' <= c && c <= 'f')
+ c = c - 'a' + 10;
+ else if ('A' <= c && c <= 'F')
+ c = c - 'A' + 10;
+ else
+ return PUI_INVALID;
+ if (result >> 60)
+ return PUI_INVALID;
+ result = (result << 4) | c;
+ c = *++str;
+ } while(c);
+ return result;
+}
+
+
+#endif
+
diff --git a/net/core/sock.c b/net/core/sock.c
index f560e08..65f5dbb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1271,6 +1271,26 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_incoming_cpu;
break;

+#ifdef CONFIG_PUI
+ case SO_PEERPUI:
+ {
+ pui_str_t str;
+ pui_t pui;
+ int l;
+
+ pui = pui_vnr(sk->sk_peer_pid);
+ if (pui == PUI_INVALID)
+ return -ENOTCONN;
+ l = pui_to_str(pui, str);
+ if (len <= l)
+ return -EINVAL;
+ len = l + 1; /* includes leading zero */
+ if (copy_to_user(optval, str, len))
+ return -EFAULT;
+ goto lenout;
+ }
+#endif
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
--
2.9.3