Re: [PATCH 2/2] memcg make move_task_charge check clearer

From: Daisuke Nishimura
Date: Thu Jun 03 2010 - 02:43:54 EST


On Thu, 3 Jun 2010 11:51:19 +0900, KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> wrote:
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
>
> Now, for checking a memcg is under task-account-moving, we do css_tryget()
> against mc.to and mc.from. But this is just complicating things. This patch
> makes the check easier.
>
> This patch adds a spinlock to move_charge_struct and guard modification
> of mc.to and mc.from. By this, we don't have to think about complicated
> races around this not-critical path.
>
> Changelog:
> - removed disable/enable irq.
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
> ---
> mm/memcontrol.c | 48 ++++++++++++++++++++++++++++--------------------
> 1 file changed, 28 insertions(+), 20 deletions(-)
>
> Index: mmotm-2.6.34-May21/mm/memcontrol.c
> ===================================================================
> --- mmotm-2.6.34-May21.orig/mm/memcontrol.c
> +++ mmotm-2.6.34-May21/mm/memcontrol.c
> @@ -268,6 +268,7 @@ enum move_type {
>
> /* "mc" and its members are protected by cgroup_mutex */
> static struct move_charge_struct {
> + spinlock_t lock; /* for from, to, moving_task */
> struct mem_cgroup *from;
> struct mem_cgroup *to;
> unsigned long precharge;
> @@ -276,6 +277,7 @@ static struct move_charge_struct {
> struct task_struct *moving_task; /* a task moving charges */
> wait_queue_head_t waitq; /* a waitq for other context */
> } mc = {
> + .lock = __SPIN_LOCK_UNLOCKED(mc.lock),
> .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
> };
>
> @@ -1076,26 +1078,25 @@ static unsigned int get_swappiness(struc
>
> static bool mem_cgroup_under_move(struct mem_cgroup *mem)
> {
> - struct mem_cgroup *from = mc.from;
> - struct mem_cgroup *to = mc.to;
> + struct mem_cgroup *from;
> + struct mem_cgroup *to;
> bool ret = false;
> -
> - if (from == mem || to == mem)
> - return true;
> -
> - if (!from || !to || !mem->use_hierarchy)
> - return false;
> -
> - rcu_read_lock();
> - if (css_tryget(&from->css)) {
> - ret = css_is_ancestor(&from->css, &mem->css);
> - css_put(&from->css);
> - }
> - if (!ret && css_tryget(&to->css)) {
> - ret = css_is_ancestor(&to->css, &mem->css);
> + /*
> + * Unlike task_move routines, we access mc.to, mc.from not under
> + * mutual exclusion by cgroup_mutex. Here, we take spinlock instead.
> + */
> + spin_lock(&mc.lock);
> + from = mc.from;
> + to = mc.to;
> + if (!from)
> + goto unlock;
> + if (from == mem || to == mem
> + || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css))
> + || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css)))
> css_put(&to->css);
I think this css_put() must be removed too.
Except for it, this patch looks good to me.

Thank you for cleaning up my dirty code :)

Thanks,
Daisuke Nishimura.

> - }
> - rcu_read_unlock();
> + ret = true;
> +unlock:
> + spin_unlock(&mc.lock);
> return ret;
> }
>
> @@ -4447,11 +4448,13 @@ static int mem_cgroup_precharge_mc(struc
>
> static void mem_cgroup_clear_mc(void)
> {
> + struct mem_cgroup *from = mc.from;
> + struct mem_cgroup *to = mc.to;
> +
> /* we must uncharge all the leftover precharges from mc.to */
> if (mc.precharge) {
> __mem_cgroup_cancel_charge(mc.to, mc.precharge);
> mc.precharge = 0;
> - memcg_oom_recover(mc.to);
> }
> /*
> * we didn't uncharge from mc.from at mem_cgroup_move_account(), so
> @@ -4460,7 +4463,6 @@ static void mem_cgroup_clear_mc(void)
> if (mc.moved_charge) {
> __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
> mc.moved_charge = 0;
> - memcg_oom_recover(mc.from);
> }
> /* we must fixup refcnts and charges */
> if (mc.moved_swap) {
> @@ -4485,9 +4487,13 @@ static void mem_cgroup_clear_mc(void)
>
> mc.moved_swap = 0;
> }
> + spin_lock(&mc.lock);
> mc.from = NULL;
> mc.to = NULL;
> mc.moving_task = NULL;
> + spin_unlock(&mc.lock);
> + memcg_oom_recover(from);
> + memcg_oom_recover(to);
> wake_up_all(&mc.waitq);
> }
>
> @@ -4516,12 +4522,14 @@ static int mem_cgroup_can_attach(struct
> VM_BUG_ON(mc.moved_charge);
> VM_BUG_ON(mc.moved_swap);
> VM_BUG_ON(mc.moving_task);
> + spin_lock(&mc.lock);
> mc.from = from;
> mc.to = mem;
> mc.precharge = 0;
> mc.moved_charge = 0;
> mc.moved_swap = 0;
> mc.moving_task = current;
> + spin_unlock(&mc.lock);
>
> ret = mem_cgroup_precharge_mc(mm);
> if (ret)
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/