[PATCH] pid_ns: support pidns switching between sibling

From: Yunhui Cui
Date: Wed Oct 11 2023 - 02:55:32 EST


In the scenario of container acceleration, when a target pstree
is cloned from a temp pstree, we hope that the cloned process is
inherently in the target's pid namespace.
Examples of what we expected:

/* switch to target ns first. */
setns(target_ns, CLONE_NEWPID);
if(!fork()) {
/* Child */
...
}
/* switch back */
setns(temp_ns, CLONE_NEWPID);

However, it is limited by the existing implementation, CAP_SYS_ADMIN
has been checked in pidns_install(), so remove the limitation that only
by traversing parent can switch pidns.

Signed-off-by: Yunhui Cui <cuiyunhui@xxxxxxxxxxxxx>
---
kernel/pid_namespace.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 3028b2218aa4..774db1f268f1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -389,7 +389,7 @@ static int pidns_install(struct nsset *nsset, struct ns_common *ns)
{
struct nsproxy *nsproxy = nsset->nsproxy;
struct pid_namespace *active = task_active_pid_ns(current);
- struct pid_namespace *ancestor, *new = to_pid_ns(ns);
+ struct pid_namespace *new = to_pid_ns(ns);

if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
@@ -406,12 +406,6 @@ static int pidns_install(struct nsset *nsset, struct ns_common *ns)
if (new->level < active->level)
return -EINVAL;

- ancestor = new;
- while (ancestor->level > active->level)
- ancestor = ancestor->parent;
- if (ancestor != active)
- return -EINVAL;
-
put_pid_ns(nsproxy->pid_ns_for_children);
nsproxy->pid_ns_for_children = get_pid_ns(new);
return 0;
--
2.20.1