[PATCH net-next 1/3] connector/cn_proc: Add hash table for threads

From: Anjali Kulkarni
Date: Fri Oct 11 2024 - 20:46:32 EST


Add a new type PROC_CN_MCAST_NOTIFY to proc connector API, which allows a
thread to notify the kernel that it has exited abnormally. Thread can also
send the exit status code it wants returned in the notification with it.
Exiting thread can call this either when it wants to call pthread_exit()
with non-zero value or from signal handler.

Add a new file cn_hash.c which implements a hash table storing the exit
codes of abnormally exiting threads, received by the system call above.
The key used for the hash table is the pid of the thread, so when the
thread actually exits, we lookup it's pid in the hash table and retrieve
the exit code sent by user. If the exit code in struct task is 0, we
then replace it with the user supplied non-zero exit code.

cn_hash.c implements the hash table add, delete, lookup operations.
mutex_lock() and mutex_unlock() operations are used to safeguard the
integrity of the hash table while adding or deleting elements.
connector.c has the API calls, called from cn_proc.c, as well as calls
to allocate, initialize and free the hash table.

Add a new flag in PF_* flags of task_struct - EXIT_NOTIFY. This flag is
set when user sends the exit code via PROC_CN_MCAST_NOTIFY. While
exiting, this flag is checked and the hash table add or delete calls
are only made if this flag is set.

A refcount field hrefcnt is added in struct cn_hash_dev, to keep track
of number of threads which have added an entry in hash table. Before
freeing the struct cn_hash_dev, this value must be 0.

Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx>
---
drivers/connector/Makefile | 2 +-
drivers/connector/cn_hash.c | 195 ++++++++++++++++++++++++++++++++++
drivers/connector/cn_proc.c | 59 +++++++++-
drivers/connector/connector.c | 83 ++++++++++++++-
include/linux/connector.h | 43 ++++++++
include/linux/sched.h | 2 +-
include/uapi/linux/cn_proc.h | 4 +-
7 files changed, 379 insertions(+), 9 deletions(-)
create mode 100644 drivers/connector/cn_hash.c

diff --git a/drivers/connector/Makefile b/drivers/connector/Makefile
index 1bf67d3df97d..cb1dcdf067ad 100644
--- a/drivers/connector/Makefile
+++ b/drivers/connector/Makefile
@@ -2,4 +2,4 @@
obj-$(CONFIG_CONNECTOR) += cn.o
obj-$(CONFIG_PROC_EVENTS) += cn_proc.o

-cn-y += cn_queue.o connector.o
+cn-y += cn_hash.o cn_queue.o connector.o
diff --git a/drivers/connector/cn_hash.c b/drivers/connector/cn_hash.c
new file mode 100644
index 000000000000..a0211cd99132
--- /dev/null
+++ b/drivers/connector/cn_hash.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Author: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx>
+ *
+ * Copyright (c) 2024 Oracle and/or its affiliates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/connector.h>
+#include <linux/mutex.h>
+#include <linux/pid_namespace.h>
+
+#include <linux/cn_proc.h>
+
+struct cn_hash_dev *cn_hash_alloc_dev(const char *name)
+{
+ struct cn_hash_dev *hdev;
+
+ hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+ if (!hdev)
+ return NULL;
+
+ snprintf(hdev->name, sizeof(hdev->name), "%s", name);
+ atomic_set(&hdev->hrefcnt, 0);
+ mutex_init(&hdev->uexit_hash_lock);
+ hash_init(hdev->uexit_pid_htable);
+ return hdev;
+}
+
+void cn_hash_free_dev(struct cn_hash_dev *hdev)
+{
+ struct uexit_pid_hnode *hnode;
+ struct hlist_node *tmp;
+ int bucket;
+
+ pr_debug("%s: Freeing entire hdev %p\n", __func__, hdev);
+
+ mutex_lock(&hdev->uexit_hash_lock);
+ hash_for_each_safe(hdev->uexit_pid_htable, bucket, tmp,
+ hnode, uexit_pid_hlist) {
+ hash_del(&hnode->uexit_pid_hlist);
+ pr_debug("%s: Freeing node for pid %d\n",
+ __func__, hnode->pid);
+ kfree(hnode);
+ }
+
+ mutex_unlock(&hdev->uexit_hash_lock);
+ mutex_destroy(&hdev->uexit_hash_lock);
+
+ while (atomic_read(&hdev->hrefcnt)) {
+ pr_info("Waiting for %s to become free: refcnt=%d\n",
+ hdev->name, atomic_read(&hdev->hrefcnt));
+ msleep(1000);
+ }
+
+ kfree(hdev);
+}
+
+static struct uexit_pid_hnode *cn_hash_alloc_elem(__u32 uexit_code, pid_t pid)
+{
+ struct uexit_pid_hnode *elem;
+
+ elem = kzalloc(sizeof(*elem), GFP_KERNEL);
+ if (!elem)
+ return NULL;
+
+ INIT_HLIST_NODE(&elem->uexit_pid_hlist);
+ elem->uexit_code = uexit_code;
+ elem->pid = pid;
+ return elem;
+}
+
+void cn_hash_free_elem(struct uexit_pid_hnode *elem)
+{
+ kfree(elem);
+}
+
+int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid)
+{
+ struct uexit_pid_hnode *elem, *hnode;
+
+ elem = cn_hash_alloc_elem(uexit_code, pid);
+ if (!elem) {
+ pr_err("%s: cn_hash_alloc_elem() returned NULL pid %d\n",
+ __func__, pid);
+ return -ENOMEM;
+ }
+
+ mutex_lock(&hdev->uexit_hash_lock);
+ /*
+ * Check if an entry for the same pid already exists
+ */
+ hash_for_each_possible(hdev->uexit_pid_htable,
+ hnode, uexit_pid_hlist, pid) {
+ if (hnode->pid == pid) {
+ mutex_unlock(&hdev->uexit_hash_lock);
+ cn_hash_free_elem(elem);
+ pr_debug("%s: pid %d already exists in hash table\n",
+ __func__, pid);
+ return -EEXIST;
+ }
+ }
+
+ hash_add(hdev->uexit_pid_htable, &elem->uexit_pid_hlist, pid);
+ mutex_unlock(&hdev->uexit_hash_lock);
+
+ atomic_inc(&hdev->hrefcnt);
+
+ pr_debug("%s: After hash_add of pid %d elem %p hrefcnt %d\n",
+ __func__, pid, elem, atomic_read(&hdev->hrefcnt));
+ return 0;
+}
+
+int cn_hash_del_elem(struct cn_hash_dev *hdev, pid_t pid)
+{
+ struct uexit_pid_hnode *hnode;
+ struct hlist_node *tmp;
+
+ mutex_lock(&hdev->uexit_hash_lock);
+ hash_for_each_possible_safe(hdev->uexit_pid_htable,
+ hnode, tmp, uexit_pid_hlist, pid) {
+ if (hnode && hnode->pid == pid) {
+ hash_del(&hnode->uexit_pid_hlist);
+ mutex_unlock(&hdev->uexit_hash_lock);
+ kfree(hnode);
+ atomic_dec(&hdev->hrefcnt);
+ pr_debug("%s: After hash_del of pid %d, hrefcnt %d\n",
+ __func__, pid,
+ atomic_read(&hdev->hrefcnt));
+ return 0;
+ }
+ }
+
+ mutex_unlock(&hdev->uexit_hash_lock);
+ pr_err("%s: pid %d not found in hash table\n",
+ __func__, pid);
+ return -EINVAL;
+}
+
+__u32 cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid)
+{
+ struct uexit_pid_hnode *hnode;
+ struct hlist_node *tmp;
+ __u32 excde;
+
+ mutex_lock(&hdev->uexit_hash_lock);
+ hash_for_each_possible_safe(hdev->uexit_pid_htable,
+ hnode, tmp, uexit_pid_hlist, pid) {
+ if (hnode->pid == pid) {
+ excde = hnode->uexit_code;
+ hash_del(&hnode->uexit_pid_hlist);
+ mutex_unlock(&hdev->uexit_hash_lock);
+ kfree(hnode);
+ atomic_dec(&hdev->hrefcnt);
+ pr_debug("%s: After hash_del of pid %d, found exit code %u hrefcnt %d\n",
+ __func__, pid, excde,
+ atomic_read(&hdev->hrefcnt));
+ return excde;
+ }
+ }
+
+ mutex_unlock(&hdev->uexit_hash_lock);
+ pr_err("%s: pid %d not found in hash table\n",
+ __func__, pid);
+ return 0;
+}
+
+__u32 cn_hash_get_exval(struct cn_hash_dev *hdev, pid_t pid)
+{
+ struct uexit_pid_hnode *hnode;
+ __u32 excde;
+
+ mutex_lock(&hdev->uexit_hash_lock);
+ hash_for_each_possible(hdev->uexit_pid_htable,
+ hnode, uexit_pid_hlist, pid) {
+ if (hnode->pid == pid) {
+ excde = hnode->uexit_code;
+ mutex_unlock(&hdev->uexit_hash_lock);
+ pr_debug("%s: Found exit code %u for pid %d\n",
+ __func__, excde, pid);
+ return excde;
+ }
+ }
+
+ mutex_unlock(&hdev->uexit_hash_lock);
+ pr_debug("%s: pid %d not found in hash table\n",
+ __func__, pid);
+ return -EINVAL;
+}
+
+bool cn_hash_table_empty(struct cn_hash_dev *hdev)
+{
+ return hash_empty(hdev->uexit_pid_htable);
+}
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 44b19e696176..8c6e002069d9 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -69,6 +69,8 @@ static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data)
if ((__u32)val == PROC_EVENT_ALL)
return 0;

+ pr_debug("%s: val %lx, what %x\n", __func__, val, what);
+
/*
* Drop packet if we have to report only non-zero exit status
* (PROC_EVENT_NONZERO_EXIT) and exit status is 0
@@ -326,9 +328,16 @@ void proc_exit_connector(struct task_struct *task)
struct proc_event *ev;
struct task_struct *parent;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
+ __u32 uexit_code;
+ int err;

- if (atomic_read(&proc_event_num_listeners) < 1)
+ if (atomic_read(&proc_event_num_listeners) < 1) {
+ if (likely(!(task->flags & PF_EXIT_NOTIFY)))
+ return;
+
+ err = cn_del_elem(task->pid);
return;
+ }

msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
@@ -337,7 +346,26 @@ void proc_exit_connector(struct task_struct *task)
ev->what = PROC_EVENT_EXIT;
ev->event_data.exit.process_pid = task->pid;
ev->event_data.exit.process_tgid = task->tgid;
- ev->event_data.exit.exit_code = task->exit_code;
+ if (unlikely(task->flags & PF_EXIT_NOTIFY)) {
+ task_lock(task);
+ task->flags &= ~PF_EXIT_NOTIFY;
+ task_unlock(task);
+
+ uexit_code = cn_del_get_exval(task->pid);
+ if (uexit_code == 0) {
+ pr_debug("%s: Returning with task's exit code %u\n",
+ __func__, task->exit_code);
+ ev->event_data.exit.exit_code = task->exit_code;
+ } else {
+ ev->event_data.exit.exit_code = uexit_code;
+ pr_debug("%s: Reset PF_EXIT_NOTIFY & retrieved exit code %u from hash table, pid %d\n",
+ __func__, task->pid,
+ ev->event_data.exit.exit_code);
+ }
+ } else {
+ ev->event_data.exit.exit_code = task->exit_code;
+ }
+
ev->event_data.exit.exit_signal = task->exit_signal;

rcu_read_lock();
@@ -413,6 +441,15 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
if (msg->len == sizeof(*pinput)) {
pinput = (struct proc_input *)msg->data;
mc_op = pinput->mcast_op;
+ if (mc_op == PROC_CN_MCAST_NOTIFY) {
+ pr_debug("%s: Received PROC_CN_MCAST_NOTIFY, pid %d\n",
+ __func__, current->pid);
+ task_lock(current);
+ current->flags |= PF_EXIT_NOTIFY;
+ task_unlock(current);
+ err = cn_add_elem(pinput->uexit_code, current->pid);
+ return;
+ }
ev_type = pinput->event_type;
} else if (msg->len == sizeof(mc_op)) {
mc_op = *((enum proc_cn_mcast_op *)msg->data);
@@ -432,6 +469,8 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
sk->sk_user_data = kzalloc(sizeof(struct proc_input),
GFP_KERNEL);
if (sk->sk_user_data == NULL) {
+ pr_err("%s: ENOMEM for sk_user_data, pid %d\n",
+ __func__, current->pid);
err = ENOMEM;
goto out;
}
@@ -442,21 +481,33 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
}
((struct proc_input *)(sk->sk_user_data))->event_type =
ev_type;
+ pr_debug("%s: sk: %p pid: %d event_type: %x\n",
+ __func__, sk, current->pid, ev_type);
((struct proc_input *)(sk->sk_user_data))->mcast_op = mc_op;
}

switch (mc_op) {
case PROC_CN_MCAST_LISTEN:
- if (initial || (prev_mc_op != PROC_CN_MCAST_LISTEN))
+ if (initial || (prev_mc_op != PROC_CN_MCAST_LISTEN)) {
atomic_inc(&proc_event_num_listeners);
+ pr_debug("%s: PROC_CN_MCAST_LISTEN pid %d: Incremented listeners to %d\n",
+ __func__, current->pid,
+ atomic_read(&proc_event_num_listeners));
+ }
break;
case PROC_CN_MCAST_IGNORE:
- if (!initial && (prev_mc_op != PROC_CN_MCAST_IGNORE))
+ if (!initial && (prev_mc_op != PROC_CN_MCAST_IGNORE)) {
atomic_dec(&proc_event_num_listeners);
+ pr_debug("%s: PROC_CN_MCAST_IGNORE pid %d: Decremented listeners to %d\n",
+ __func__, current->pid,
+ atomic_read(&proc_event_num_listeners));
+ }
((struct proc_input *)(sk->sk_user_data))->event_type =
PROC_EVENT_NONE;
break;
default:
+ pr_warn("%s: Invalid value for mc_op %d\n",
+ __func__, mc_op);
err = EINVAL;
break;
}
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 4028e8eeba82..506e3cbedf85 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -271,6 +271,67 @@ static int __maybe_unused cn_proc_show(struct seq_file *m, void *v)
return 0;
}

+__u32 cn_del_get_exval(pid_t pid)
+{
+ struct cn_dev *dev = &cdev;
+ __u32 exval;
+
+ if (!cn_already_initialized)
+ return 0;
+
+ exval = cn_hash_del_get_exval(dev->hdev, pid);
+ return exval;
+}
+EXPORT_SYMBOL_GPL(cn_del_get_exval);
+
+int cn_del_elem(pid_t pid)
+{
+ struct cn_dev *dev = &cdev;
+ int ret;
+
+ if (!cn_already_initialized)
+ return 0;
+
+ ret = cn_hash_del_elem(dev->hdev, pid);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cn_del_elem);
+
+int cn_add_elem(__u32 uexit_code, pid_t pid)
+{
+ struct cn_dev *dev = &cdev;
+
+ if (!cn_already_initialized)
+ return 0;
+
+ return cn_hash_add_elem(dev->hdev, uexit_code, pid);
+}
+EXPORT_SYMBOL_GPL(cn_add_elem);
+
+__u32 cn_get_exval(pid_t pid)
+{
+ struct cn_dev *dev = &cdev;
+ __u32 exval;
+
+ if (!cn_already_initialized)
+ return 0;
+
+ exval = cn_hash_get_exval(dev->hdev, pid);
+ return exval;
+}
+EXPORT_SYMBOL_GPL(cn_get_exval);
+
+bool cn_table_empty(void)
+{
+ struct cn_dev *dev = &cdev;
+
+ if (!cn_already_initialized)
+ return 0;
+
+ return cn_hash_table_empty(dev->hdev);
+}
+EXPORT_SYMBOL_GPL(cn_table_empty);
+
static int cn_init(void)
{
struct cn_dev *dev = &cdev;
@@ -283,18 +344,35 @@ static int cn_init(void)
};

dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg);
- if (!dev->nls)
+ if (!dev->nls) {
+ pr_err("%s: netlink_kernel_create failed, connector not initialized\n",
+ __func__);
return -EIO;
+ }

dev->cbdev = cn_queue_alloc_dev("cqueue", dev->nls);
if (!dev->cbdev) {
+ pr_err("%s: Allocation of dev->cbdev failed, connector not initialized\n",
+ __func__);
netlink_kernel_release(dev->nls);
return -EINVAL;
}

+ dev->hdev = cn_hash_alloc_dev("pid hash table");
+ if (!dev->hdev) {
+ pr_err("%s: Allocation of dev->hdev failed, connector not initialized\n",
+ __func__);
+ netlink_kernel_release(dev->nls);
+ cn_queue_free_dev(dev->cbdev);
+ return -ENOMEM;
+ }
+
+ pr_debug("Connector initialized, allocated hdev %p\n", dev->hdev);
+
cn_already_initialized = 1;

- proc_create_single("connector", S_IRUGO, init_net.proc_net, cn_proc_show);
+ proc_create_single("connector", S_IRUGO, init_net.proc_net,
+ cn_proc_show);

return 0;
}
@@ -308,6 +386,7 @@ static void cn_fini(void)
remove_proc_entry("connector", init_net.proc_net);

cn_queue_free_dev(dev->cbdev);
+ cn_hash_free_dev(dev->hdev);
netlink_kernel_release(dev->nls);
}

diff --git a/include/linux/connector.h b/include/linux/connector.h
index 70bc1160f3d8..094e1730a4f6 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -18,6 +18,8 @@
#include <uapi/linux/connector.h>

#define CN_CBQ_NAMELEN 32
+#define HASHT_NAMELEN 32
+#define PID_HASH_TABLE_BITS 10

struct cn_queue_dev {
atomic_t refcnt;
@@ -45,6 +47,19 @@ struct cn_callback_entry {
u32 seq, group;
};

+struct uexit_pid_hnode {
+ __u32 uexit_code;
+ pid_t pid;
+ struct hlist_node uexit_pid_hlist;
+};
+
+struct cn_hash_dev {
+ atomic_t hrefcnt;
+ unsigned char name[HASHT_NAMELEN];
+ struct mutex uexit_hash_lock;
+ DECLARE_HASHTABLE(uexit_pid_htable, PID_HASH_TABLE_BITS);
+};
+
struct cn_dev {
struct cb_id id;

@@ -52,6 +67,7 @@ struct cn_dev {
struct sock *nls;

struct cn_queue_dev *cbdev;
+ struct cn_hash_dev *hdev;
};

/**
@@ -137,4 +153,31 @@ void cn_queue_free_dev(struct cn_queue_dev *dev);

int cn_cb_equal(const struct cb_id *, const struct cb_id *);

+struct cn_hash_dev *cn_hash_alloc_dev(const char *name);
+void cn_hash_free_dev(struct cn_hash_dev *hdev);
+struct uexit_pid_hnode *cn_hash_find_pid_node(struct cn_hash_dev *hdev,
+ pid_t pid);
+void cn_hash_free_elem(struct uexit_pid_hnode *elem);
+int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid);
+int cn_hash_del_elem(struct cn_hash_dev *hdev, pid_t pid);
+__u32 cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid);
+
+int cn_add_elem(__u32 uexit_code, pid_t pid);
+int cn_del_elem(pid_t pid);
+__u32 cn_del_get_exval(pid_t pid);
+__u32 cn_get_exval(pid_t pid);
+
+struct cn_hash_dev *cn_hash_alloc_dev(const char *name);
+void cn_hash_free_dev(struct cn_hash_dev *hdev);
+struct uexit_pid_hnode *cn_hash_find_pid_node(struct cn_hash_dev *hdev,
+ pid_t pid);
+void cn_hash_free_elem(struct uexit_pid_hnode *elem);
+int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid);
+int cn_hash_del_elem(struct cn_hash_dev *hdev, pid_t pid);
+__u32 cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid);
+__u32 cn_hash_get_exval(struct cn_hash_dev *hdev, pid_t pid);
+
+bool cn_table_empty(void);
+bool cn_hash_table_empty(struct cn_hash_dev *hdev);
+
#endif /* __CONNECTOR_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e6ee4258169a..a2339ae6208b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1673,7 +1673,7 @@ extern struct pid *cad_pid;
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
-#define PF__HOLE__00010000 0x00010000
+#define PF_EXIT_NOTIFY 0x00010000 /* This thread has sent an exit value to be sent as a notification to listening processes */
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h
index 18e3745b86cd..2b12a24e4651 100644
--- a/include/uapi/linux/cn_proc.h
+++ b/include/uapi/linux/cn_proc.h
@@ -27,7 +27,8 @@
*/
enum proc_cn_mcast_op {
PROC_CN_MCAST_LISTEN = 1,
- PROC_CN_MCAST_IGNORE = 2
+ PROC_CN_MCAST_IGNORE = 2,
+ PROC_CN_MCAST_NOTIFY = 3
};

#define PROC_EVENT_ALL (PROC_EVENT_FORK | PROC_EVENT_EXEC | PROC_EVENT_UID | \
@@ -65,6 +66,7 @@ enum proc_cn_event {
struct proc_input {
enum proc_cn_mcast_op mcast_op;
enum proc_cn_event event_type;
+ __u32 uexit_code;
};

static inline enum proc_cn_event valid_event(enum proc_cn_event ev_type)
--
2.46.0