[PATCH 1/2] cgroup: Add generation number with cgroup id

From: Namhyung Kim
Date: Wed Oct 16 2019 - 08:50:31 EST


Current cgroup id is 32-bit and might be recycled while system is
running. To support unique id, add generation number (gen) to catch
recycling and make 64 bit number. This number will be used as kernfs
id and inode number (and file handle).

Also introduced cgroup_idr struct to keep the idr and generation
together. The related functions are little bit modified as well and I
made some change to cgroup_idr_alloc() to use cyclic allocator.

Later 64 bit system can have a simpler implementation with a single 64
bit sequence number and a RB tree. But it'll need to grab a spinlock
during lookup. I'm not entirely sure it's ok, so I left it as is.

Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
include/linux/cgroup-defs.h | 12 ++++++--
kernel/cgroup/cgroup.c | 58 +++++++++++++++++++++++++++----------
2 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 430e219e3aba..e3f2b47c5c7b 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -364,6 +364,9 @@ struct cgroup {
*/
int id;

+ /* generation number in case of recycled id */
+ int gen;
+
/*
* The depth this cgroup is at. The root is at depth zero and each
* step down the hierarchy increments the level. This along with
@@ -491,6 +494,11 @@ struct cgroup {
int ancestor_ids[];
};

+struct cgroup_idr {
+ struct idr idr;
+ int generation;
+};
+
/*
* A cgroup_root represents the root of a cgroup hierarchy, and may be
* associated with a kernfs_root to form an active hierarchy. This is
@@ -521,7 +529,7 @@ struct cgroup_root {
unsigned int flags;

/* IDs for cgroups in this hierarchy */
- struct idr cgroup_idr;
+ struct cgroup_idr cgroup_idr;

/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
@@ -701,7 +709,7 @@ struct cgroup_subsys {
struct cgroup_root *root;

/* idr for css->id */
- struct idr css_idr;
+ struct cgroup_idr css_idr;

/*
* List of cftypes. Each entry is the first entry of an array
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8b1c4fd47a7a..44c67d26c1fe 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -300,34 +300,60 @@ bool cgroup_on_dfl(const struct cgroup *cgrp)
return cgrp->root == &cgrp_dfl_root;
}

+static void cgroup_idr_init(struct cgroup_idr *cidr)
+{
+ idr_init(&cidr->idr);
+ cidr->generation = 1; /* for kernfs compatibility */
+}
+
+static void cgroup_idr_destroy(struct cgroup_idr *cidr)
+{
+ idr_destroy(&cidr->idr);
+}
+
+static void *cgroup_idr_find(struct cgroup_idr *cidr, unsigned long id)
+{
+ return idr_find(&cidr->idr, id);
+}
+
/* IDR wrappers which synchronize using cgroup_idr_lock */
-static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
+static int cgroup_idr_alloc(struct cgroup_idr *cidr, void *ptr, bool is_root,
gfp_t gfp_mask)
{
int ret;
+ int cursor;

idr_preload(gfp_mask);
spin_lock_bh(&cgroup_idr_lock);
- ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
+ gfp_mask &= ~__GFP_DIRECT_RECLAIM;
+
+ if (is_root) {
+ ret = idr_alloc(&cidr->idr, ptr, 1, 2, gfp_mask);
+ } else {
+ cursor = idr_get_cursor(&cidr->idr);
+ ret = idr_alloc_cyclic(&cidr->idr, ptr, 2, 0, gfp_mask);
+ if (ret > 0 && ret < cursor)
+ cidr->generation++;
+ }
spin_unlock_bh(&cgroup_idr_lock);
idr_preload_end();
return ret;
}

-static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
+static void *cgroup_idr_replace(struct cgroup_idr *cidr, void *ptr, int id)
{
void *ret;

spin_lock_bh(&cgroup_idr_lock);
- ret = idr_replace(idr, ptr, id);
+ ret = idr_replace(&cidr->idr, ptr, id);
spin_unlock_bh(&cgroup_idr_lock);
return ret;
}

-static void cgroup_idr_remove(struct idr *idr, int id)
+static void cgroup_idr_remove(struct cgroup_idr *cidr, int id)
{
spin_lock_bh(&cgroup_idr_lock);
- idr_remove(idr, id);
+ idr_remove(&cidr->idr, id);
spin_unlock_bh(&cgroup_idr_lock);
}

@@ -1309,7 +1335,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root)
void cgroup_free_root(struct cgroup_root *root)
{
if (root) {
- idr_destroy(&root->cgroup_idr);
+ cgroup_idr_destroy(&root->cgroup_idr);
kfree(root);
}
}
@@ -1976,7 +2002,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
atomic_set(&root->nr_cgrps, 1);
cgrp->root = root;
init_cgroup_housekeeping(cgrp);
- idr_init(&root->cgroup_idr);
+ cgroup_idr_init(&root->cgroup_idr);

root->flags = ctx->flags;
if (ctx->release_agent)
@@ -1997,10 +2023,11 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)

lockdep_assert_held(&cgroup_mutex);

- ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
+ ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, true, GFP_KERNEL);
if (ret < 0)
goto out;
root_cgrp->id = ret;
+ root_cgrp->gen = root->cgroup_idr.generation;
root_cgrp->ancestor_ids[0] = ret;

ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
@@ -5186,7 +5213,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
if (err)
goto err_free_css;

- err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
+ err = cgroup_idr_alloc(&ss->css_idr, NULL, false, GFP_KERNEL);
if (err < 0)
goto err_free_css;
css->id = err;
@@ -5251,11 +5278,12 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
* Temporarily set the pointer to NULL, so idr_find() won't return
* a half-baked cgroup.
*/
- cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
+ cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, false, GFP_KERNEL);
if (cgrp->id < 0) {
ret = -ENOMEM;
goto out_stat_exit;
}
+ cgrp->gen = root->cgroup_idr.generation;

init_cgroup_housekeeping(cgrp);

@@ -5643,7 +5671,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)

mutex_lock(&cgroup_mutex);

- idr_init(&ss->css_idr);
+ cgroup_idr_init(&ss->css_idr);
INIT_LIST_HEAD(&ss->cfts);

/* Create the root cgroup state for this subsystem */
@@ -5663,7 +5691,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
/* allocation can't be done safely during early init */
css->id = 1;
} else {
- css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
+ css->id = cgroup_idr_alloc(&ss->css_idr, css, true, GFP_KERNEL);
BUG_ON(css->id < 0);
}

@@ -5770,7 +5798,7 @@ int __init cgroup_init(void)
struct cgroup_subsys_state *css =
init_css_set.subsys[ss->id];

- css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
+ css->id = cgroup_idr_alloc(&ss->css_idr, css, true,
GFP_KERNEL);
BUG_ON(css->id < 0);
} else {
@@ -6236,7 +6264,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
{
WARN_ON_ONCE(!rcu_read_lock_held());
- return idr_find(&ss->css_idr, id);
+ return cgroup_idr_find(&ss->css_idr, id);
}

/**
--
2.23.0.700.g56cf767bdb-goog