[PATCH 35/38] C/R: checkpoint/restore struct user_namespace

From: Alexey Dobriyan
Date: Fri May 22 2009 - 01:04:36 EST


We have first loop -- user->user_ns->creator (which is struct user_struct)

user_ns image references ->creator image but only partially because
user_namespaces are dumped before user_structs.

Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
---
include/linux/kstate-image.h | 12 +++
include/linux/kstate.h | 5 ++
kernel/kstate/cpt-sys.c | 6 ++
kernel/kstate/kstate-context.c | 6 ++
kernel/kstate/kstate-object.c | 4 +
kernel/user.c | 21 +++++-
kernel/user_namespace.c | 146 ++++++++++++++++++++++++++++++++++++++++
7 files changed, 198 insertions(+), 2 deletions(-)
delete mode 100644 kernel/kstate/kstate-uts_ns.c

diff --git a/include/linux/kstate-image.h b/include/linux/kstate-image.h
index 605a2b5..a573833 100644
--- a/include/linux/kstate-image.h
+++ b/include/linux/kstate-image.h
@@ -52,6 +52,7 @@ struct kstate_image_header {
#define KSTATE_OBJ_CRED 12
#define KSTATE_OBJ_GROUP_INFO 13
#define KSTATE_OBJ_USER_STRUCT 14
+#define KSTATE_OBJ_USER_NS 15

struct kstate_object_header {
__u32 obj_type;
@@ -291,6 +292,17 @@ struct kstate_image_group_info {
struct kstate_image_user_struct {
struct kstate_object_header hdr;

+ kstate_ref_t ref_user_ns;
__u32 uid;
} __packed;
+
+struct kstate_image_user_ns {
+ struct kstate_object_header hdr;
+
+ /*
+ * KSTATE_REF_UNDEF if user_ns creator user was outside of container,
+ * otherwise partial {0, id} reference.
+ */
+ kstate_ref_t ref_creator;
+} __packed;
#endif
diff --git a/include/linux/kstate.h b/include/linux/kstate.h
index dd6b982..f0c8e09 100644
--- a/include/linux/kstate.h
+++ b/include/linux/kstate.h
@@ -35,6 +35,7 @@ enum kstate_context_obj_type {
KSTATE_CTX_NSPROXY,
KSTATE_CTX_PID_NS,
KSTATE_CTX_TASK_STRUCT,
+ KSTATE_CTX_USER_NS,
KSTATE_CTX_USER_STRUCT,
KSTATE_CTX_UTS_NS,
NR_KSTATE_CTX_TYPES
@@ -139,6 +140,10 @@ int kstate_collect_all_user_struct(struct kstate_context *ctx);
int kstate_dump_all_user_struct(struct kstate_context *ctx);
int kstate_restore_user_struct(struct kstate_context *ctx, kstate_ref_t *ref);

+int kstate_collect_all_user_ns(struct kstate_context *ctx);
+int kstate_dump_all_user_ns(struct kstate_context *ctx);
+int kstate_restore_user_ns(struct kstate_context *ctx, kstate_ref_t *ref);
+
#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
extern const __u32 kstate_kernel_arch;
int kstate_arch_check_image_header(struct kstate_image_header *i);
diff --git a/kernel/kstate/cpt-sys.c b/kernel/kstate/cpt-sys.c
index a409577..3df776e 100644
--- a/kernel/kstate/cpt-sys.c
+++ b/kernel/kstate/cpt-sys.c
@@ -98,6 +98,9 @@ static int kstate_collect(struct kstate_context *ctx)
rv = kstate_collect_all_user_struct(ctx);
if (rv < 0)
return rv;
+ rv = kstate_collect_all_user_ns(ctx);
+ if (rv < 0)
+ return rv;
return 0;
}

@@ -151,6 +154,9 @@ static int kstate_dump(struct kstate_context *ctx)
rv = kstate_dump_all_pid_ns(ctx);
if (rv < 0)
return rv;
+ rv = kstate_dump_all_user_ns(ctx);
+ if (rv < 0)
+ return rv;
rv = kstate_dump_all_user_struct(ctx);
if (rv < 0)
return rv;
diff --git a/kernel/kstate/kstate-context.c b/kernel/kstate/kstate-context.c
index 854f971..f8168cc 100644
--- a/kernel/kstate/kstate-context.c
+++ b/kernel/kstate/kstate-context.c
@@ -7,6 +7,7 @@
#include <linux/pid_namespace.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/user_namespace.h>
#include <linux/utsname.h>
#include <net/net_namespace.h>

@@ -90,6 +91,11 @@ void kstate_context_destroy(struct kstate_context *ctx)
list_del(&obj->o_list);
kfree(obj);
}
+ for_each_kstate_object_safe(ctx, obj, tmp, KSTATE_CTX_USER_NS) {
+ put_user_ns((struct user_namespace *)obj->o_obj);
+ list_del(&obj->o_list);
+ kfree(obj);
+ }
for_each_kstate_object_safe(ctx, obj, tmp, KSTATE_CTX_USER_STRUCT) {
free_uid((struct user_struct *)obj->o_obj);
list_del(&obj->o_list);
diff --git a/kernel/kstate/kstate-object.c b/kernel/kstate/kstate-object.c
index 75facda..eb77027 100644
--- a/kernel/kstate/kstate-object.c
+++ b/kernel/kstate/kstate-object.c
@@ -7,6 +7,7 @@
#include <linux/pid_namespace.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/user_namespace.h>
#include <linux/utsname.h>
#include <net/net_namespace.h>

@@ -69,6 +70,9 @@ int kstate_collect_object(struct kstate_context *ctx, void *p, enum kstate_conte
case KSTATE_CTX_TASK_STRUCT:
get_task_struct((struct task_struct *)obj->o_obj);
break;
+ case KSTATE_CTX_USER_NS:
+ get_user_ns((struct user_namespace *)obj->o_obj);
+ break;
case KSTATE_CTX_USER_STRUCT:
get_uid((struct user_struct *)obj->o_obj);
break;
diff --git a/kernel/kstate/kstate-uts_ns.c b/kernel/kstate/kstate-uts_ns.c
deleted file mode 100644
index e69de29..0000000
diff --git a/kernel/user.c b/kernel/user.c
index 9fda1f0..508c05d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -554,6 +554,10 @@ int kstate_collect_all_user_struct(struct kstate_context *ctx)
if (rv < 0)
return rv;
}
+ /*
+ * Don't check refcounts here, user_ns->creator references weren't
+ * accounted yet, it will fire every time CLONE_NEWUSER is used.
+ */
return 0;
}

@@ -561,12 +565,15 @@ static int dump_user_struct(struct kstate_context *ctx, struct kstate_object *ob
{
struct user_struct *user = obj->o_obj;
struct kstate_image_user_struct *i;
+ struct kstate_object *tmp;
int rv;

i = kstate_prepare_image(KSTATE_OBJ_USER_STRUCT, sizeof(*i));
if (!i)
return -ENOMEM;

+ tmp = find_kstate_obj_by_ptr(ctx, user->user_ns, KSTATE_CTX_USER_NS);
+ i->ref_user_ns = tmp->o_ref;
i->uid = user->uid;

rv = kstate_write_image(ctx, i, sizeof(*i), obj);
@@ -592,14 +599,24 @@ int kstate_restore_user_struct(struct kstate_context *ctx, kstate_ref_t *ref)
{
struct kstate_image_user_struct *i;
struct user_struct *user;
+ struct user_namespace *user_ns;
+ struct kstate_object *tmp;
int rv;

i = kstate_read_image(ctx, ref, KSTATE_OBJ_USER_STRUCT, sizeof(*i));
if (IS_ERR(i))
return PTR_ERR(i);

- /* FIXME */
- user = alloc_uid(&init_user_ns, i->uid);
+ tmp = find_kstate_obj_by_ref(ctx, &i->ref_user_ns, KSTATE_CTX_USER_NS);
+ if (!tmp) {
+ rv = kstate_restore_user_ns(ctx, &i->ref_user_ns);
+ if (rv < 0)
+ goto out_free_image;
+ tmp = find_kstate_obj_by_ref(ctx, &i->ref_user_ns, KSTATE_CTX_USER_NS);
+ }
+ user_ns = tmp->o_obj;
+
+ user = alloc_uid(user_ns, i->uid);
if (!user) {
rv = -ENOMEM;
goto out_free_image;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 076c7c8..04ef11d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (C) 2000-2009 Parallels Holdings, Ltd.
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2 of the
@@ -82,3 +84,147 @@ void free_user_ns(struct kref *kref)
schedule_work(&ns->destroyer);
}
EXPORT_SYMBOL(free_user_ns);
+
+#ifdef CONFIG_CHECKPOINT
+#include <linux/kstate.h>
+#include <linux/kstate-image.h>
+
+static int collect_user_ns(struct kstate_context *ctx, struct user_namespace *user_ns)
+{
+ int rv;
+
+ rv = kstate_collect_object(ctx, user_ns, KSTATE_CTX_USER_NS);
+ pr_debug("collect user_ns %p: rv %d\n", user_ns, rv);
+ return rv;
+}
+
+int kstate_collect_all_user_ns(struct kstate_context *ctx)
+{
+ struct kstate_object *obj;
+ int rv;
+
+ for_each_kstate_object(ctx, obj, KSTATE_CTX_USER_STRUCT) {
+ struct user_struct *user = obj->o_obj;
+
+ rv = collect_user_ns(ctx, user->user_ns);
+ if (rv < 0)
+ return rv;
+ }
+ for_each_kstate_object(ctx, obj, KSTATE_CTX_USER_NS) {
+ struct user_namespace *user_ns = obj->o_obj;
+ unsigned int cnt = atomic_read(&user_ns->kref.refcount);
+
+ if (obj->o_count + 1 != cnt) {
+ pr_err("user_ns %p has external references %lu:%u\n", user_ns, obj->o_count, cnt);
+ return -EINVAL;
+ }
+ }
+ /*
+ * user pins user_ns which pins user_ns->creator, that's why we don't
+ * check for user refcount leaks right after user collecting.
+ * Do it here after counting user_ns creators one more time except
+ * those which are legitimately outside of container.
+ */
+ for_each_kstate_object(ctx, obj, KSTATE_CTX_USER_NS) {
+ struct user_namespace *user_ns = obj->o_obj;
+ struct kstate_object *tmp;
+
+ tmp = find_kstate_obj_by_ptr(ctx, user_ns->creator, KSTATE_CTX_USER_STRUCT);
+ if (tmp)
+ tmp->o_count++;
+ }
+ for_each_kstate_object(ctx, obj, KSTATE_CTX_USER_STRUCT) {
+ struct user_struct *user = obj->o_obj;
+ unsigned int cnt = atomic_read(&user->__count);
+
+ if (obj->o_count + 1 != cnt) {
+ pr_err("user_struct %p has external references %lu:%u\n", user, obj->o_count, cnt);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int dump_user_ns(struct kstate_context *ctx, struct kstate_object *obj)
+{
+ struct user_namespace *user_ns = obj->o_obj;
+ struct kstate_image_user_ns *i;
+ struct kstate_object *tmp;
+ int rv;
+
+ i = kstate_prepare_image(KSTATE_OBJ_USER_NS, sizeof(*i));
+ if (!i)
+ return -ENOMEM;
+
+ tmp = find_kstate_obj_by_ptr(ctx, user_ns->creator, KSTATE_CTX_USER_STRUCT);
+ if (!tmp)
+ i->ref_creator = KSTATE_REF_UNDEF;
+ else
+ i->ref_creator = tmp->o_ref;
+
+ rv = kstate_write_image(ctx, i, sizeof(*i), obj);
+ kfree(i);
+ pr_debug("dump user_ns %p: ref {%llu, %u}, rv %d\n", user_ns, (unsigned long long)obj->o_ref.pos, obj->o_ref.id, rv);
+ return rv;
+}
+
+int kstate_dump_all_user_ns(struct kstate_context *ctx)
+{
+ struct kstate_object *obj;
+ int rv;
+
+ for_each_kstate_object(ctx, obj, KSTATE_CTX_USER_NS) {
+ rv = dump_user_ns(ctx, obj);
+ if (rv < 0)
+ return rv;
+ }
+ return 0;
+}
+
+int kstate_restore_user_ns(struct kstate_context *ctx, kstate_ref_t *ref)
+{
+ struct kstate_image_user_ns *i;
+ struct user_namespace *user_ns;
+ int n;
+ int rv;
+
+ i = kstate_read_image(ctx, ref, KSTATE_OBJ_USER_NS, sizeof(*i));
+ if (IS_ERR(i))
+ return PTR_ERR(i);
+
+ user_ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+ if (!user_ns) {
+ rv = -ENOMEM;
+ goto out_free_image;
+ }
+ kref_init(&user_ns->kref);
+ for (n = 0; n < UIDHASH_SZ; ++n)
+ INIT_HLIST_HEAD(user_ns->uidhash_table + n);
+ user_ns->creator = NULL;
+
+ if (kstate_ref_undefined(&i->ref_creator)) {
+ user_ns->creator = ctx->init_tsk->cred->user;
+ } else {
+ struct kstate_object *tmp;
+
+ tmp = find_kstate_obj_by_id(ctx, &i->ref_creator, KSTATE_CTX_USER_STRUCT);
+ if (!tmp) {
+ rv = -EINVAL;
+ goto out_free_image;
+ }
+ user_ns->creator = tmp->o_obj;
+ }
+ kfree(i);
+
+ rv = kstate_restore_object(ctx, user_ns, KSTATE_CTX_USER_NS, ref);
+ if (rv < 0)
+ kfree(user_ns);
+ pr_debug("restore user_ns %p: ref {%llu, %u}, rv %d\n", user_ns, (unsigned long long)ref->pos, ref->id, rv);
+ return rv;
+
+out_free_image:
+ kfree(i);
+ pr_debug("%s: return %d, ref {%llu, %u}\n", __func__, rv, (unsigned long long)ref->pos, ref->id);
+ return rv;
+}
+#endif
--
1.5.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/