Re: [RFC PATCH 6/9] livepatch: create per-task consistency model

From: Jiri Slaby
Date: Sat Feb 14 2015 - 06:40:16 EST


On 02/09/2015, 06:31 PM, Josh Poimboeuf wrote:
> Add a basic per-task consistency model. This is the foundation which
> will eventually enable us to patch those ~10% of security patches which
> change function prototypes and/or data semantics.
>
> When a patch is enabled, livepatch enters into a transition state where
> tasks are converging from the old universe to the new universe. If a
> given task isn't using any of the patched functions, it's switched to
> the new universe. Once all the tasks have been converged to the new
> universe, patching is complete.
>
> The same sequence occurs when a patch is disabled, except the tasks
> converge from the new universe to the old universe.
>
> The /sys/kernel/livepatch/<patch>/transition file shows whether a patch
> is in transition. Only a single patch (the topmost patch on the stack)
> can be in transition at a given time. A patch can remain in the
> transition state indefinitely, if any of the tasks are stuck in the
> previous universe.
>
> A transition can be reversed and effectively canceled by writing the
> opposite value to the /sys/kernel/livepatch/<patch>/enabled file while
> the transition is in progress. Then all the tasks will attempt to
> converge back to the original universe.
>
> Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
> ---
> include/linux/livepatch.h | 18 ++-
> include/linux/sched.h | 3 +
> kernel/fork.c | 2 +
> kernel/livepatch/Makefile | 2 +-
> kernel/livepatch/core.c | 71 ++++++----
> kernel/livepatch/patch.c | 34 ++++-
> kernel/livepatch/patch.h | 1 +
> kernel/livepatch/transition.c | 300 ++++++++++++++++++++++++++++++++++++++++++
> kernel/livepatch/transition.h | 16 +++
> kernel/sched/core.c | 2 +
> 10 files changed, 423 insertions(+), 26 deletions(-)
> create mode 100644 kernel/livepatch/transition.c
> create mode 100644 kernel/livepatch/transition.h
>
> diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
> index 0e65b4d..b8c2f15 100644
> --- a/include/linux/livepatch.h
> +++ b/include/linux/livepatch.h
> @@ -40,6 +40,7 @@
> * @old_size: size of the old function
> * @new_size: size of the new function
> * @patched: the func has been added to the klp_ops list
> + * @transition: the func is currently being applied or reverted
> */
> struct klp_func {
> /* external */
> @@ -60,6 +61,7 @@ struct klp_func {
> struct list_head stack_node;
> unsigned long old_size, new_size;
> int patched;
> + int transition;
> };
>
> /**
> @@ -128,6 +130,20 @@ extern int klp_unregister_patch(struct klp_patch *);
> extern int klp_enable_patch(struct klp_patch *);
> extern int klp_disable_patch(struct klp_patch *);
>
> -#endif /* CONFIG_LIVEPATCH */
> +extern int klp_universe_goal;
> +
> +static inline void klp_update_task_universe(struct task_struct *t)
> +{
> + /* corresponding smp_wmb() is in klp_set_universe_goal() */
> + smp_rmb();
> +
> + t->klp_universe = klp_universe_goal;
> +}
> +
> +#else /* !CONFIG_LIVEPATCH */
> +
> +static inline void klp_update_task_universe(struct task_struct *t) {}
> +
> +#endif /* !CONFIG_LIVEPATCH */
>
> #endif /* _LINUX_LIVEPATCH_H_ */
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 8db31ef..a95e59a 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1701,6 +1701,9 @@ struct task_struct {
> #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
> unsigned long task_state_change;
> #endif
> +#ifdef CONFIG_LIVEPATCH
> + int klp_universe;
> +#endif
> };
>
> /* Future-safe accessor for struct task_struct's cpus_allowed. */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 4dc2dda..1dcbebe 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -74,6 +74,7 @@
> #include <linux/uprobes.h>
> #include <linux/aio.h>
> #include <linux/compiler.h>
> +#include <linux/livepatch.h>
>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> @@ -1538,6 +1539,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
> total_forks++;
> spin_unlock(&current->sighand->siglock);
> syscall_tracepoint_update(p);
> + klp_update_task_universe(p);
> write_unlock_irq(&tasklist_lock);
>
> proc_fork_connector(p);
> diff --git a/kernel/livepatch/Makefile b/kernel/livepatch/Makefile
> index e136dad..2b8bdb1 100644
> --- a/kernel/livepatch/Makefile
> +++ b/kernel/livepatch/Makefile
> @@ -1,3 +1,3 @@
> obj-$(CONFIG_LIVEPATCH) += livepatch.o
>
> -livepatch-objs := core.o patch.o
> +livepatch-objs := core.o patch.o transition.o
> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
> index 85d4ef7..790dc10 100644
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -28,14 +28,17 @@
> #include <linux/kallsyms.h>
>
> #include "patch.h"
> +#include "transition.h"
>
> /*
> - * The klp_mutex protects the global lists and state transitions of any
> - * structure reachable from them. References to any structure must be obtained
> - * under mutex protection (except in klp_ftrace_handler(), which uses RCU to
> - * ensure it gets consistent data).
> + * The klp_mutex is a coarse lock which serializes access to klp data. All
> + * accesses to klp-related variables and structures must have mutex protection,
> + * except within the following functions which carefully avoid the need for it:
> + *
> + * - klp_ftrace_handler()
> + * - klp_update_task_universe()
> */
> -static DEFINE_MUTEX(klp_mutex);
> +DEFINE_MUTEX(klp_mutex);
>
> static LIST_HEAD(klp_patches);
>
> @@ -67,7 +70,6 @@ static void klp_find_object_module(struct klp_object *obj)
> mutex_unlock(&module_mutex);
> }
>
> -/* klp_mutex must be held by caller */
> static bool klp_is_patch_registered(struct klp_patch *patch)
> {
> struct klp_patch *mypatch;
> @@ -285,18 +287,17 @@ static int klp_write_object_relocations(struct module *pmod,
>
> static int __klp_disable_patch(struct klp_patch *patch)
> {
> - struct klp_object *obj;
> + if (klp_transition_patch)
> + return -EBUSY;
>
> /* enforce stacking: only the last enabled patch can be disabled */
> if (!list_is_last(&patch->list, &klp_patches) &&
> list_next_entry(patch, list)->enabled)
> return -EBUSY;
>
> - pr_notice("disabling patch '%s'\n", patch->mod->name);
> -
> - for (obj = patch->objs; obj->funcs; obj++)
> - if (obj->patched)
> - klp_unpatch_object(obj);
> + klp_init_transition(patch, KLP_UNIVERSE_NEW);
> + klp_start_transition(KLP_UNIVERSE_OLD);
> + klp_try_complete_transition();
>
> patch->enabled = 0;
>
> @@ -340,6 +341,9 @@ static int __klp_enable_patch(struct klp_patch *patch)
> struct klp_object *obj;
> int ret;
>
> + if (klp_transition_patch)
> + return -EBUSY;
> +
> if (WARN_ON(patch->enabled))
> return -EINVAL;
>
> @@ -351,7 +355,7 @@ static int __klp_enable_patch(struct klp_patch *patch)
> pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n");
> add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
>
> - pr_notice("enabling patch '%s'\n", patch->mod->name);
> + klp_init_transition(patch, KLP_UNIVERSE_OLD);
>
> for (obj = patch->objs; obj->funcs; obj++) {
> klp_find_object_module(obj);
> @@ -360,17 +364,24 @@ static int __klp_enable_patch(struct klp_patch *patch)
> continue;
>
> ret = klp_patch_object(obj);
> - if (ret)
> - goto unregister;
> + if (ret) {
> + pr_warn("failed to enable patch '%s'\n",
> + patch->mod->name);
> +
> + klp_unpatch_objects(patch);
> + klp_complete_transition();
> +
> + return ret;
> + }
> }
>
> + klp_start_transition(KLP_UNIVERSE_NEW);
> +
> + klp_try_complete_transition();
> +
> patch->enabled = 1;
>
> return 0;
> -
> -unregister:
> - WARN_ON(__klp_disable_patch(patch));
> - return ret;
> }
>
> /**
> @@ -407,6 +418,7 @@ EXPORT_SYMBOL_GPL(klp_enable_patch);
> * /sys/kernel/livepatch
> * /sys/kernel/livepatch/<patch>
> * /sys/kernel/livepatch/<patch>/enabled
> + * /sys/kernel/livepatch/<patch>/transition
> * /sys/kernel/livepatch/<patch>/<object>
> * /sys/kernel/livepatch/<patch>/<object>/<func>
> */
> @@ -435,7 +447,9 @@ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
> goto err;
> }
>
> - if (val) {
> + if (klp_transition_patch == patch) {
> + klp_reverse_transition();
> + } else if (val) {
> ret = __klp_enable_patch(patch);
> if (ret)
> goto err;
> @@ -463,9 +477,21 @@ static ssize_t enabled_show(struct kobject *kobj,
> return snprintf(buf, PAGE_SIZE-1, "%d\n", patch->enabled);
> }
>
> +static ssize_t transition_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + struct klp_patch *patch;
> +
> + patch = container_of(kobj, struct klp_patch, kobj);
> + return snprintf(buf, PAGE_SIZE-1, "%d\n",
> + klp_transition_patch == patch);
> +}
> +
> static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled);
> +static struct kobj_attribute transition_kobj_attr = __ATTR_RO(transition);
> static struct attribute *klp_patch_attrs[] = {
> &enabled_kobj_attr.attr,
> + &transition_kobj_attr.attr,
> NULL
> };
>
> @@ -543,6 +569,7 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
> {
> INIT_LIST_HEAD(&func->stack_node);
> func->patched = 0;
> + func->transition = 0;
>
> return kobject_init_and_add(&func->kobj, &klp_ktype_func,
> obj->kobj, func->old_name);
> @@ -725,7 +752,7 @@ static void klp_module_notify_coming(struct klp_patch *patch,
> if (ret)
> goto err;
>
> - if (!patch->enabled)
> + if (!patch->enabled && klp_transition_patch != patch)
> return;
>
> pr_notice("applying patch '%s' to loading module '%s'\n",
> @@ -746,7 +773,7 @@ static void klp_module_notify_going(struct klp_patch *patch,
> struct module *pmod = patch->mod;
> struct module *mod = obj->mod;
>
> - if (!patch->enabled)
> + if (!patch->enabled && klp_transition_patch != patch)
> goto free;
>
> pr_notice("reverting patch '%s' on unloading module '%s'\n",
> diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
> index 281fbca..f12256b 100644
> --- a/kernel/livepatch/patch.c
> +++ b/kernel/livepatch/patch.c
> @@ -24,6 +24,7 @@
> #include <linux/slab.h>
>
> #include "patch.h"
> +#include "transition.h"
>
> static LIST_HEAD(klp_ops);
>
> @@ -38,14 +39,34 @@ static void notrace klp_ftrace_handler(unsigned long ip,
> ops = container_of(fops, struct klp_ops, fops);
>
> rcu_read_lock();
> +
> func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
> stack_node);
> - rcu_read_unlock();
>
> if (WARN_ON_ONCE(!func))
> - return;
> + goto unlock;
> +
> + if (unlikely(func->transition)) {
> + /* corresponding smp_wmb() is in klp_init_transition() */
> + smp_rmb();
> +
> + if (current->klp_universe == KLP_UNIVERSE_OLD) {
> + /*
> + * Use the previously patched version of the function.
> + * If no previous patches exist, use the original
> + * function.
> + */
> + func = list_entry_rcu(func->stack_node.next,
> + struct klp_func, stack_node);
> +
> + if (&func->stack_node == &ops->func_stack)
> + goto unlock;
> + }
> + }
>
> klp_arch_set_pc(regs, (unsigned long)func->new_func);
> +unlock:
> + rcu_read_unlock();
> }
>
> struct klp_ops *klp_find_ops(unsigned long old_addr)
> @@ -174,3 +195,12 @@ int klp_patch_object(struct klp_object *obj)
>
> return 0;
> }
> +
> +void klp_unpatch_objects(struct klp_patch *patch)
> +{
> + struct klp_object *obj;
> +
> + for (obj = patch->objs; obj->funcs; obj++)
> + if (obj->patched)
> + klp_unpatch_object(obj);
> +}
> diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h
> index bb34bd3..1648259 100644
> --- a/kernel/livepatch/patch.h
> +++ b/kernel/livepatch/patch.h
> @@ -23,3 +23,4 @@ struct klp_ops *klp_find_ops(unsigned long old_addr);
>
> extern int klp_patch_object(struct klp_object *obj);
> extern void klp_unpatch_object(struct klp_object *obj);
> +extern void klp_unpatch_objects(struct klp_patch *patch);
> diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
> new file mode 100644
> index 0000000..2630296
> --- /dev/null
> +++ b/kernel/livepatch/transition.c
> @@ -0,0 +1,300 @@
> +/*
> + * transition.c - Kernel Live Patching transition functions
> + *
> + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/cpu.h>
> +#include <asm/stacktrace.h>
> +#include "../sched/sched.h"
> +
> +#include "patch.h"
> +#include "transition.h"
> +
> +static void klp_transition_work_fn(struct work_struct *);
> +static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn);
> +
> +struct klp_patch *klp_transition_patch;
> +
> +int klp_universe_goal = KLP_UNIVERSE_UNDEFINED;
> +
> +static void klp_set_universe_goal(int universe)
> +{
> + klp_universe_goal = universe;
> +
> + /* corresponding smp_rmb() is in klp_update_task_universe() */
> + smp_wmb();
> +}
> +
> +/*
> + * The transition to the universe goal is complete. Clean up the data
> + * structures.
> + */
> +void klp_complete_transition(void)
> +{
> + struct klp_object *obj;
> + struct klp_func *func;
> +
> + for (obj = klp_transition_patch->objs; obj->funcs; obj++)
> + for (func = obj->funcs; func->old_name; func++)
> + func->transition = 0;
> +
> + klp_transition_patch = NULL;
> +}
> +
> +static int klp_stacktrace_address_verify_func(struct klp_func *func,
> + unsigned long address)
> +{
> + unsigned long func_addr, func_size;
> +
> + if (klp_universe_goal == KLP_UNIVERSE_OLD) {
> + /* check the to-be-unpatched function (the func itself) */
> + func_addr = (unsigned long)func->new_func;
> + func_size = func->new_size;
> + } else {
> + /* check the to-be-patched function (previous func) */
> + struct klp_ops *ops;
> +
> + ops = klp_find_ops(func->old_addr);
> +
> + if (list_is_singular(&ops->func_stack)) {
> + /* original function */
> + func_addr = func->old_addr;
> + func_size = func->old_size;
> + } else {
> + /* previously patched function */
> + struct klp_func *prev;
> +
> + prev = list_next_entry(func, stack_node);
> + func_addr = (unsigned long)prev->new_func;
> + func_size = prev->new_size;
> + }
> + }
> +
> + if (address >= func_addr && address < func_addr + func_size)
> + return -1;
> +
> + return 0;
> +}
> +
> +/*
> + * Determine whether the given return address on the stack is within a
> + * to-be-patched or to-be-unpatched function.
> + */
> +static void klp_stacktrace_address_verify(void *data, unsigned long address,
> + int reliable)
> +{
> + struct klp_object *obj;
> + struct klp_func *func;
> + int *ret = data;
> +
> + if (*ret)
> + return;
> +
> + for (obj = klp_transition_patch->objs; obj->funcs; obj++) {
> + if (!obj->patched)
> + continue;
> + for (func = obj->funcs; func->old_name; func++) {
> + if (klp_stacktrace_address_verify_func(func, address)) {
> + *ret = -1;
> + return;
> + }
> + }
> + }
> +}
> +
> +static int klp_stacktrace_stack(void *data, char *name)
> +{
> + return 0;
> +}
> +
> +static const struct stacktrace_ops klp_stacktrace_ops = {
> + .address = klp_stacktrace_address_verify,
> + .stack = klp_stacktrace_stack,
> + .walk_stack = print_context_stack_bp,
> +};
> +
> +/*
> + * Try to safely transition a task to the universe goal. If the task is
> + * currently running or is sleeping on a to-be-patched or to-be-unpatched
> + * function, return false.
> + */
> +static bool klp_transition_task(struct task_struct *t)
> +{
> + struct rq *rq;
> + unsigned long flags;
> + int ret;
> + bool success = false;
> +
> + if (t->klp_universe == klp_universe_goal)
> + return true;
> +
> + rq = task_rq_lock(t, &flags);
> +
> + if (task_running(rq, t) && t != current) {
> + pr_debug("%s: pid %d (%s) is running\n", __func__, t->pid,
> + t->comm);
> + goto done;
> + }
> +
> + ret = 0;
> + dump_trace(t, NULL, NULL, 0, &klp_stacktrace_ops, &ret);
> + if (ret) {
> + pr_debug("%s: pid %d (%s) is sleeping on a patched function\n",
> + __func__, t->pid, t->comm);
> + goto done;
> + }
> +
> + klp_update_task_universe(t);
> +
> + success = true;
> +done:
> + task_rq_unlock(rq, t, &flags);
> + return success;
> +}
> +
> +/*
> + * Try to transition all tasks to the universe goal. If any tasks are still
> + * stuck in the original universe, schedule a retry.
> + */
> +void klp_try_complete_transition(void)
> +{
> + unsigned int cpu;
> + struct task_struct *g, *t;
> + bool complete = true;
> +
> + /* try to transition all normal tasks */
> + read_lock(&tasklist_lock);
> + for_each_process_thread(g, t)
> + if (!klp_transition_task(t))
> + complete = false;
> + read_unlock(&tasklist_lock);
> +
> + /* try to transition the idle "swapper" tasks */
> + get_online_cpus();
> + for_each_online_cpu(cpu)
> + if (!klp_transition_task(idle_task(cpu)))
> + complete = false;
> + put_online_cpus();
> +
> + /* if not complete, try again later */
> + if (!complete) {
> + schedule_delayed_work(&klp_transition_work,
> + round_jiffies_relative(HZ));
> + return;
> + }
> +
> + /* success! unpatch obsolete functions and do some cleanup */
> +
> + if (klp_universe_goal == KLP_UNIVERSE_OLD) {
> + klp_unpatch_objects(klp_transition_patch);
> +
> + /* prevent ftrace handler from reading old func->transition */
> + synchronize_rcu();
> + }
> +
> + pr_notice("'%s': %s complete\n", klp_transition_patch->mod->name,
> + klp_universe_goal == KLP_UNIVERSE_NEW ? "patching" :
> + "unpatching");
> +
> + klp_complete_transition();
> +}
> +
> +static void klp_transition_work_fn(struct work_struct *work)
> +{
> + mutex_lock(&klp_mutex);
> +
> + if (klp_transition_patch)
> + klp_try_complete_transition();
> +
> + mutex_unlock(&klp_mutex);
> +}
> +
> +/*
> + * Start the transition to the specified universe so tasks can begin switching
> + * to it.
> + */
> +void klp_start_transition(int universe)
> +{
> + if (WARN_ON(klp_universe_goal == universe))
> + return;
> +
> + pr_notice("'%s': %s...\n", klp_transition_patch->mod->name,
> + universe == KLP_UNIVERSE_NEW ? "patching" : "unpatching");
> +
> + klp_set_universe_goal(universe);
> +}
> +
> +/*
> + * Can be called in the middle of an existing transition to reverse the
> + * direction of the universe goal. This can be done to effectively cancel an
> + * existing enable or disable operation if there are any tasks which are stuck
> + * in the original universe.
> + */
> +void klp_reverse_transition(void)
> +{
> + struct klp_patch *patch = klp_transition_patch;
> +
> + klp_start_transition(!klp_universe_goal);
> + klp_try_complete_transition();
> +
> + patch->enabled = !patch->enabled;
> +}
> +
> +/*
> + * Reset the universe goal and all tasks to the starting universe, and set all
> + * func->transition's to 1 to prepare for patching.
> + */
> +void klp_init_transition(struct klp_patch *patch, int universe)
> +{
> + struct task_struct *g, *t;
> + unsigned int cpu;
> + struct klp_object *obj;
> + struct klp_func *func;
> +
> + klp_transition_patch = patch;
> +
> + /*
> + * If the previous transition was in the opposite direction, we may
> + * already be in the requested initial universe.
> + */
> + if (klp_universe_goal == universe)
> + goto init_funcs;
> +
> + klp_set_universe_goal(universe);
> +
> + /* init all normal task universes */
> + read_lock(&tasklist_lock);
> + for_each_process_thread(g, t)
> + klp_update_task_universe(t);
> + read_unlock(&tasklist_lock);
> +
> + /* init all idle "swapper" task universes */
> + get_online_cpus();
> + for_each_online_cpu(cpu)
> + klp_update_task_universe(idle_task(cpu));
> + put_online_cpus();
> +
> +init_funcs:
> + /* corresponding smp_rmb() is in klp_ftrace_handler() */
> + smp_wmb();
> +
> + for (obj = patch->objs; obj->funcs; obj++)
> + for (func = obj->funcs; func->old_name; func++)
> + func->transition = 1;

So I finally got to review of this one. I have only two concerns:
1) it removes the ability for the user to use 'no consistency model'.
But you don't need to worry about this, I plan to implement this as soon
as you send v2 of these.

2) How is this 'transition = 1' store above guaranteed to reach other
CPUs before you start registering ftrace handlers? The CPUs need not see
the update when some handler is already invoked before start_transition
AFAICS.

thanks,
--
js
suse labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/