Re: [RFC][PATCH] ipc: Remove IPCMNI
From: Waiman Long
Date: Thu Mar 15 2018 - 13:02:22 EST
On 03/14/2018 08:49 PM, Eric W. Biederman wrote:
> The define IPCMNI was originally the size of a statically sized array in
> the kernel and that has long since been removed. Therefore there is no
> fundamental reason for IPCMNI.
>
> The only remaining use IPCMNI serves is as a convoluted way to format
> the ipc id to userspace. It does not appear that anything except for
> the CHECKPOINT_RESTORE code even cares about this variety of assignment
> and the CHECKPOINT_RESTORE code only cares about this weirdness because
> it has to restore these peculiar ids.
>
> Therefore make the assignment of ipc ids match the description in
> Advanced Programming in the Unix Environment and assign the next id
> until INT_MAX is hit then loop around to the lower ids.
>
> This can be implemented trivially with the current code using idr_alloc_cyclic.
>
> To make it possible to keep checkpoint/restore working I have renamed
> the sysctls from xxx_next_id to xxx_nextid. That is enough change that
> a smart CRIU implementation can see that what is exported has changed,
> and act accordingly. New kernels will be able to restore the old id's.
>
> This code still needs some real world testing to verify my assumptions.
> And some work with the CRIU implementations to actually add the code
> that deals with the new for of id assignment.
>
> Updates: 03f595668017 ("ipc: add sysctl to specify desired next object id")
> Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
> ---
>
> Waiman please take a look at this and run it through some tests etc,
> I am pretty certain something like this patch is all you need to do
> to sort out ipc assignment. Not messing with sysctls needed.
>
> include/linux/ipc.h | 2 --
> include/linux/ipc_namespace.h | 1 -
> ipc/ipc_sysctl.c | 6 ++--
> ipc/namespace.c | 11 ++----
> ipc/util.c | 80 ++++++++++---------------------------------
> ipc/util.h | 11 +-----
> 6 files changed, 25 insertions(+), 86 deletions(-)
>
> diff --git a/include/linux/ipc.h b/include/linux/ipc.h
> index 821b2f260992..6cc2df7f7ac9 100644
> --- a/include/linux/ipc.h
> +++ b/include/linux/ipc.h
> @@ -8,8 +8,6 @@
> #include <uapi/linux/ipc.h>
> #include <linux/refcount.h>
>
> -#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
> -
> /* used by in-kernel data structures */
> struct kern_ipc_perm {
> spinlock_t lock;
> diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
> index b5630c8eb2f3..cab33b6a8236 100644
> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -15,7 +15,6 @@ struct user_namespace;
>
> struct ipc_ids {
> int in_use;
> - unsigned short seq;
> bool tables_initialized;
> struct rw_semaphore rwsem;
> struct idr ipcs_idr;
> diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
> index 8ad93c29f511..a599963d58bf 100644
> --- a/ipc/ipc_sysctl.c
> +++ b/ipc/ipc_sysctl.c
> @@ -176,7 +176,7 @@ static struct ctl_table ipc_kern_table[] = {
> },
> #ifdef CONFIG_CHECKPOINT_RESTORE
> {
> - .procname = "sem_next_id",
> + .procname = "sem_nextid",
> .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
> .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
> .mode = 0644,
> @@ -185,7 +185,7 @@ static struct ctl_table ipc_kern_table[] = {
> .extra2 = &int_max,
> },
> {
> - .procname = "msg_next_id",
> + .procname = "msg_nextid",
> .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
> .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
> .mode = 0644,
> @@ -194,7 +194,7 @@ static struct ctl_table ipc_kern_table[] = {
> .extra2 = &int_max,
> },
> {
> - .procname = "shm_next_id",
> + .procname = "shm_nextid",
> .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
> .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
> .mode = 0644,
So you are changing the names of existing sysctl parameters. Will it be
better to add new sysctl to indicate that the rule has changed instead?
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index f59a89966f92..84eaeba9e96c 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -109,20 +109,13 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
> {
> struct kern_ipc_perm *perm;
> int next_id;
> - int total, in_use;
>
> down_write(&ids->rwsem);
> -
> - in_use = ids->in_use;
> -
> - for (total = 0, next_id = 0; total < in_use; next_id++) {
> - perm = idr_find(&ids->ipcs_idr, next_id);
> - if (perm == NULL)
> - continue;
> + next_id = 0;
> + while ((perm = idr_get_next(&ids->ipcs_idr, &next_id))) {
> rcu_read_lock();
> ipc_lock_object(perm);
> free(ns, perm);
> - total++;
> }
> up_write(&ids->rwsem);
> }
> diff --git a/ipc/util.c b/ipc/util.c
> index 4ed5a17dd06f..ce6bf18e54df 100644
> --- a/ipc/util.c
> +++ b/ipc/util.c
> @@ -118,7 +118,6 @@ int ipc_init_ids(struct ipc_ids *ids)
> {
> int err;
> ids->in_use = 0;
> - ids->seq = 0;
> init_rwsem(&ids->rwsem);
> err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
> if (err)
> @@ -192,46 +191,18 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
> return NULL;
> }
>
> -#ifdef CONFIG_CHECKPOINT_RESTORE
> -/*
> - * Specify desired id for next allocated IPC object.
> - */
> -#define ipc_idr_alloc(ids, new) \
> - idr_alloc(&(ids)->ipcs_idr, (new), \
> - (ids)->next_id < 0 ? 0 : ipcid_to_idx((ids)->next_id),\
> - 0, GFP_NOWAIT)
>
> -static inline int ipc_buildid(int id, struct ipc_ids *ids,
> - struct kern_ipc_perm *new)
> +static int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
> {
> - if (ids->next_id < 0) { /* default, behave as !CHECKPOINT_RESTORE */
> - new->seq = ids->seq++;
> - if (ids->seq > IPCID_SEQ_MAX)
> - ids->seq = 0;
> - } else {
> - new->seq = ipcid_to_seqx(ids->next_id);
> +#ifdef CONFIG_CHECKPOINT_RESTORE
> + if (ids->next_id >= 0) {
> + idr_set_cursor(&ids->ipcs_idr, ids->next_id);
> ids->next_id = -1;
> }
> -
> - return SEQ_MULTIPLIER * new->seq + id;
> -}
> -
> -#else
> -#define ipc_idr_alloc(ids, new) \
> - idr_alloc(&(ids)->ipcs_idr, (new), 0, 0, GFP_NOWAIT)
> -
> -static inline int ipc_buildid(int id, struct ipc_ids *ids,
> - struct kern_ipc_perm *new)
> -{
> - new->seq = ids->seq++;
> - if (ids->seq > IPCID_SEQ_MAX)
> - ids->seq = 0;
> -
> - return SEQ_MULTIPLIER * new->seq + id;
> +#endif
> + return idr_alloc_cyclic(&ids->ipcs_idr, (new), 0, 0, GFP_NOWAIT);
> }
>
> -#endif /* CONFIG_CHECKPOINT_RESTORE */
> -
> /**
> * ipc_addid - add an ipc identifier
> * @ids: ipc identifier set
> @@ -251,9 +222,6 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
> kgid_t egid;
> int id, err;
>
> - if (limit > IPCMNI)
> - limit = IPCMNI;
> -
> if (!ids->tables_initialized || ids->in_use >= limit)
> return -ENOSPC;
>
> @@ -290,7 +258,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
> if (id > ids->max_id)
> ids->max_id = id;
>
> - new->id = ipc_buildid(id, ids, new);
> + new->id = id;
>
> return id;
> }
> @@ -430,7 +398,7 @@ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
> */
> void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
> {
> - int lid = ipcid_to_idx(ipcp->id);
> + int lid = ipcp->id;
>
> idr_remove(&ids->ipcs_idr, lid);
> ipc_kht_remove(ids, ipcp);
> @@ -563,7 +531,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
> struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
> {
> struct kern_ipc_perm *out;
> - int lid = ipcid_to_idx(id);
> + int lid = id;
>
> if (unlikely(!ids->tables_initialized))
> return ERR_PTR(-EINVAL);
> @@ -757,30 +725,20 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
> loff_t *new_pos)
> {
> struct kern_ipc_perm *ipc;
> - int total, id;
> -
> - total = 0;
> - for (id = 0; id < pos && total < ids->in_use; id++) {
> - ipc = idr_find(&ids->ipcs_idr, id);
> - if (ipc != NULL)
> - total++;
> - }
> + int id;
I think you need to initialize id to pos. Right?
>
> - if (total >= ids->in_use)
> + /* Out of range - return NULL to terminate iteration */
> + if (pos > INT_MAX)
> return NULL;
>
> - for (; pos < IPCMNI; pos++) {
> - ipc = idr_find(&ids->ipcs_idr, pos);
> - if (ipc != NULL) {
> - *new_pos = pos + 1;
> - rcu_read_lock();
> - ipc_lock_object(ipc);
> - return ipc;
> - }
> - }
> + ipc = idr_get_next(&ids->ipcs_idr, &id);
> + if (!ipc)
> + return NULL;
>
> - /* Out of range - return NULL to terminate iteration */
> - return NULL;
> + *new_pos = id + 1;
> + rcu_read_lock();
> + ipc_lock_object(ipc);
> + return ipc;
> }
>
> static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
> diff --git a/ipc/util.h b/ipc/util.h
> index 89b8ec176fc4..de8e27367f0c 100644
> --- a/ipc/util.h
> +++ b/ipc/util.h
> @@ -15,8 +15,6 @@
> #include <linux/err.h>
> #include <linux/ipc_namespace.h>
>
> -#define SEQ_MULTIPLIER (IPCMNI)
> -
> int sem_init(void);
> int msg_init(void);
> void shm_init(void);
> @@ -93,10 +91,6 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
> #define IPC_MSG_IDS 1
> #define IPC_SHM_IDS 2
>
> -#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
> -#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
> -#define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX)
> -
> /* must be called with ids->rwsem acquired for writing */
> int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
>
> @@ -120,9 +114,6 @@ static inline int ipc_get_maxid(struct ipc_ids *ids)
> if (ids->in_use == 0)
> return -1;
>
> - if (ids->in_use == IPCMNI)
> - return IPCMNI - 1;
> -
> return ids->max_id;
> }
>
> @@ -163,7 +154,7 @@ extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
>
> static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
> {
> - return uid / SEQ_MULTIPLIER != ipcp->seq;
> + return uid != ipcp->seq;
> }
>
> static inline void ipc_lock_object(struct kern_ipc_perm *perm)
I don't know the history why the id management of SysV IPC was designed
in such a convoluted way, but the patch does make sense to me.
Cheers,
Longman