Re: [patch 8/8] memcg: sanitize __mem_cgroup_try_charge() call protocol

From: Michal Hocko
Date: Wed Mar 12 2014 - 10:02:16 EST


On Tue 11-03-14 21:28:34, Johannes Weiner wrote:
> Some callsites pass a memcg directly, some callsites pass a mm that
> first has to be translated to an mm. This makes for a terrible
> function interface.
>
> Just push the mm-to-memcg translation into the respective callsites
> and always pass a memcg to mem_cgroup_try_charge().
>
> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>

text data bss dec hex filename
39435 5916 4192 49543 c187 mm/memcontrol.o.after
40466 5916 4192 50574 c58e mm/memcontrol.o.before

1K down very nice. But we can shave off additional ~300B if the the
common mm charging helper as I suggested before:

text data bss dec hex filename
39100 5916 4192 49208 c038 mm/memcontrol.o.mm

commit 7aa420bc051849d85dcf5a091f3619c6b8e33cfb
Author: Michal Hocko <mhocko@xxxxxxx>
Date: Wed Mar 12 14:59:06 2014 +0100

add charge mm helper

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d7aa3e784d9..67e01b27a021 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2757,6 +2757,35 @@ bypass:
return -EINTR;
}

+/**
+ * mem_cgroup_try_charge_mm - try charging a mm
+ * @mm: mm_struct to charge
+ * @nr_pages: number of pages to charge
+ * @oom: trigger OOM if reclaim fails
+ *
+ * Returns the charged mem_cgroup associated with the given mm_struct or
+ * NULL the charge failed.
+ */
+static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
+ gfp_t gfp_mask,
+ unsigned int nr_pages,
+ bool oom)
+
+{
+ struct mem_cgroup *memcg;
+ int ret;
+
+ memcg = get_mem_cgroup_from_mm(mm);
+ ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
+ css_put(&memcg->css);
+ if (ret == -EINTR)
+ memcg = root_mem_cgroup;
+ else if (ret)
+ memcg = NULL;
+
+ return memcg;
+}
+
/*
* Somemtimes we have to undo a charge we got by try_charge().
* This function is for that and do uncharge, put css's refcnt.
@@ -3828,7 +3857,6 @@ int mem_cgroup_newpage_charge(struct page *page,
unsigned int nr_pages = 1;
struct mem_cgroup *memcg;
bool oom = true;
- int ret;

if (mem_cgroup_disabled())
return 0;
@@ -3847,13 +3875,9 @@ int mem_cgroup_newpage_charge(struct page *page,
oom = false;
}

- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- return ret;
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
+ if (!memcg)
+ return -ENOMEM;
__mem_cgroup_commit_charge(memcg, page, nr_pages,
MEM_CGROUP_CHARGE_TYPE_ANON, false);
return 0;
@@ -3914,15 +3938,10 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
*/
if (!PageSwapCache(page)) {
struct mem_cgroup *memcg;
- int ret;

- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, gfp_mask, 1, true);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- return ret;
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+ if (!memcg)
+ return -ENOMEM;
*memcgp = memcg;
return 0;
}
@@ -3996,13 +4015,9 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
if (unlikely(!mm))
memcg = root_mem_cgroup;
else {
- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, gfp_mask, 1, true);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- return ret;
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+ if (!memcg)
+ return -ENOMEM;
}
__mem_cgroup_commit_charge(memcg, page, 1, type, false);
return 0;

Anyway to your patch as is. The above can be posted as a separate patch
or folded in as you prefer.

Acked-by: Michal Hocko <mhocko@xxxxxxx>

> ---
> mm/memcontrol.c | 184 +++++++++++++++++++++++++-------------------------------
> 1 file changed, 83 insertions(+), 101 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 4f7192bfa5fa..876598b4505b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2609,7 +2609,7 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
> }
>
>
> -/* See __mem_cgroup_try_charge() for details */
> +/* See mem_cgroup_try_charge() for details */
> enum {
> CHARGE_OK, /* success */
> CHARGE_RETRY, /* need to retry but retry is not bad */
> @@ -2682,45 +2682,34 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
> return CHARGE_NOMEM;
> }
>
> -/*
> - * __mem_cgroup_try_charge() does
> - * 1. detect memcg to be charged against from passed *mm and *ptr,
> - * 2. update res_counter
> - * 3. call memory reclaim if necessary.
> - *
> - * In some special case, if the task is fatal, fatal_signal_pending() or
> - * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
> - * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
> - * as possible without any hazards. 2: all pages should have a valid
> - * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
> - * pointer, that is treated as a charge to root_mem_cgroup.
> - *
> - * So __mem_cgroup_try_charge() will return
> - * 0 ... on success, filling *ptr with a valid memcg pointer.
> - * -ENOMEM ... charge failure because of resource limits.
> - * -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup.
> +/**
> + * mem_cgroup_try_charge - try charging a memcg
> + * @memcg: memcg to charge
> + * @nr_pages: number of pages to charge
> + * @oom: trigger OOM if reclaim fails
> *
> - * Unlike the exported interface, an "oom" parameter is added. if oom==true,
> - * the oom-killer can be invoked.
> + * Returns 0 if @memcg was charged successfully, -EINTR if the charge
> + * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
> */
> -static int __mem_cgroup_try_charge(struct mm_struct *mm,
> - gfp_t gfp_mask,
> - unsigned int nr_pages,
> - struct mem_cgroup **ptr,
> - bool oom)
> +static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
> + gfp_t gfp_mask,
> + unsigned int nr_pages,
> + bool oom)
> {
> unsigned int batch = max(CHARGE_BATCH, nr_pages);
> int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
> - struct mem_cgroup *memcg = NULL;
> int ret;
>
> + if (mem_cgroup_is_root(memcg))
> + goto done;
> /*
> - * Unlike gloval-vm's OOM-kill, we're not in memory shortage
> - * in system level. So, allow to go ahead dying process in addition to
> - * MEMDIE process.
> + * Unlike in global OOM situations, memcg is not in a physical
> + * memory shortage. Allow dying and OOM-killed tasks to
> + * bypass the last charges so that they can exit quickly and
> + * free their memory.
> */
> - if (unlikely(test_thread_flag(TIF_MEMDIE)
> - || fatal_signal_pending(current)))
> + if (unlikely(test_thread_flag(TIF_MEMDIE) ||
> + fatal_signal_pending(current)))
> goto bypass;
>
> if (unlikely(task_in_memcg_oom(current)))
> @@ -2729,14 +2718,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
> if (gfp_mask & __GFP_NOFAIL)
> oom = false;
> again:
> - if (*ptr) { /* css should be a valid one */
> - memcg = *ptr;
> - css_get(&memcg->css);
> - } else {
> - memcg = get_mem_cgroup_from_mm(mm);
> - }
> - if (mem_cgroup_is_root(memcg))
> - goto done;
> if (consume_stock(memcg, nr_pages))
> goto done;
>
> @@ -2744,10 +2725,8 @@ again:
> bool invoke_oom = oom && !nr_oom_retries;
>
> /* If killed, bypass charge */
> - if (fatal_signal_pending(current)) {
> - css_put(&memcg->css);
> + if (fatal_signal_pending(current))
> goto bypass;
> - }
>
> ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
> nr_pages, invoke_oom);
> @@ -2756,17 +2735,12 @@ again:
> break;
> case CHARGE_RETRY: /* not in OOM situation but retry */
> batch = nr_pages;
> - css_put(&memcg->css);
> - memcg = NULL;
> goto again;
> case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
> - css_put(&memcg->css);
> goto nomem;
> case CHARGE_NOMEM: /* OOM routine works */
> - if (!oom || invoke_oom) {
> - css_put(&memcg->css);
> + if (!oom || invoke_oom)
> goto nomem;
> - }
> nr_oom_retries--;
> break;
> }
> @@ -2775,16 +2749,11 @@ again:
> if (batch > nr_pages)
> refill_stock(memcg, batch - nr_pages);
> done:
> - css_put(&memcg->css);
> - *ptr = memcg;
> return 0;
> nomem:
> - if (!(gfp_mask & __GFP_NOFAIL)) {
> - *ptr = NULL;
> + if (!(gfp_mask & __GFP_NOFAIL))
> return -ENOMEM;
> - }
> bypass:
> - *ptr = root_mem_cgroup;
> return -EINTR;
> }
>
> @@ -2983,20 +2952,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
> static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
> {
> struct res_counter *fail_res;
> - struct mem_cgroup *_memcg;
> int ret = 0;
>
> ret = res_counter_charge(&memcg->kmem, size, &fail_res);
> if (ret)
> return ret;
>
> - _memcg = memcg;
> - ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
> - &_memcg, oom_gfp_allowed(gfp));
> -
> + ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
> + oom_gfp_allowed(gfp));
> if (ret == -EINTR) {
> /*
> - * __mem_cgroup_try_charge() chosed to bypass to root due to
> + * mem_cgroup_try_charge() chosed to bypass to root due to
> * OOM kill or fatal signal. Since our only options are to
> * either fail the allocation or charge it to this cgroup, do
> * it as a temporary condition. But we can't fail. From a
> @@ -3006,7 +2972,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
> *
> * This condition will only trigger if the task entered
> * memcg_charge_kmem in a sane state, but was OOM-killed during
> - * __mem_cgroup_try_charge() above. Tasks that were already
> + * mem_cgroup_try_charge() above. Tasks that were already
> * dying when the allocation triggers should have been already
> * directed to the root cgroup in memcontrol.h
> */
> @@ -3858,8 +3824,8 @@ out:
> int mem_cgroup_newpage_charge(struct page *page,
> struct mm_struct *mm, gfp_t gfp_mask)
> {
> - struct mem_cgroup *memcg = NULL;
> unsigned int nr_pages = 1;
> + struct mem_cgroup *memcg;
> bool oom = true;
> int ret;
>
> @@ -3880,8 +3846,12 @@ int mem_cgroup_newpage_charge(struct page *page,
> oom = false;
> }
>
> - ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
> - if (ret == -ENOMEM)
> + memcg = get_mem_cgroup_from_mm(mm);
> + ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
> + css_put(&memcg->css);
> + if (ret == -EINTR)
> + memcg = root_mem_cgroup;
> + else if (ret)
> return ret;
> __mem_cgroup_commit_charge(memcg, page, nr_pages,
> MEM_CGROUP_CHARGE_TYPE_ANON, false);
> @@ -3899,7 +3869,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
> gfp_t mask,
> struct mem_cgroup **memcgp)
> {
> - struct mem_cgroup *memcg;
> + struct mem_cgroup *memcg = NULL;
> struct page_cgroup *pc;
> int ret;
>
> @@ -3912,31 +3882,29 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
> * in turn serializes uncharging.
> */
> if (PageCgroupUsed(pc))
> - return 0;
> - if (!do_swap_account)
> - goto charge_cur_mm;
> - memcg = try_get_mem_cgroup_from_page(page);
> + goto out;
> + if (do_swap_account)
> + memcg = try_get_mem_cgroup_from_page(page);
> if (!memcg)
> - goto charge_cur_mm;
> - *memcgp = memcg;
> - ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
> + memcg = get_mem_cgroup_from_mm(mm);
> + ret = mem_cgroup_try_charge(memcg, mask, 1, true);
> css_put(&memcg->css);
> if (ret == -EINTR)
> - ret = 0;
> - return ret;
> -charge_cur_mm:
> - ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
> - if (ret == -EINTR)
> - ret = 0;
> - return ret;
> + memcg = root_mem_cgroup;
> + else if (ret)
> + return ret;
> +out:
> + *memcgp = memcg;
> + return 0;
> }
>
> int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
> gfp_t gfp_mask, struct mem_cgroup **memcgp)
> {
> - *memcgp = NULL;
> - if (mem_cgroup_disabled())
> + if (mem_cgroup_disabled()) {
> + *memcgp = NULL;
> return 0;
> + }
> /*
> * A racing thread's fault, or swapoff, may have already
> * updated the pte, and even removed page from swap cache: in
> @@ -3944,12 +3912,18 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
> * there's also a KSM case which does need to charge the page.
> */
> if (!PageSwapCache(page)) {
> + struct mem_cgroup *memcg;
> int ret;
>
> - ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true);
> + memcg = get_mem_cgroup_from_mm(mm);
> + ret = mem_cgroup_try_charge(memcg, gfp_mask, 1, true);
> + css_put(&memcg->css);
> if (ret == -EINTR)
> - ret = 0;
> - return ret;
> + memcg = root_mem_cgroup;
> + else if (ret)
> + return ret;
> + *memcgp = memcg;
> + return 0;
> }
> return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
> }
> @@ -3996,8 +3970,8 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
> int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
> gfp_t gfp_mask)
> {
> - struct mem_cgroup *memcg = NULL;
> enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
> + struct mem_cgroup *memcg;
> int ret;
>
> if (mem_cgroup_disabled())
> @@ -4005,23 +3979,32 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
> if (PageCompound(page))
> return 0;
>
> - if (!PageSwapCache(page)) {
> - /*
> - * Page cache insertions can happen without an actual
> - * task context, e.g. during disk probing on boot.
> - */
> - if (!mm)
> - memcg = root_mem_cgroup;
> - ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
> - if (ret != -ENOMEM)
> - __mem_cgroup_commit_charge(memcg, page, 1, type, false);
> - } else { /* page is swapcache/shmem */
> + if (PageSwapCache(page)) { /* shmem */
> ret = __mem_cgroup_try_charge_swapin(mm, page,
> gfp_mask, &memcg);
> - if (!ret)
> - __mem_cgroup_commit_charge_swapin(page, memcg, type);
> + if (ret)
> + return ret;
> + __mem_cgroup_commit_charge_swapin(page, memcg, type);
> + return 0;
> }
> - return ret;
> +
> + /*
> + * Page cache insertions can happen without an actual mm
> + * context, e.g. during disk probing on boot.
> + */
> + if (unlikely(!mm))
> + memcg = root_mem_cgroup;
> + else {
> + memcg = get_mem_cgroup_from_mm(mm);
> + ret = mem_cgroup_try_charge(memcg, gfp_mask, 1, true);
> + css_put(&memcg->css);
> + if (ret == -EINTR)
> + memcg = root_mem_cgroup;
> + else if (ret)
> + return ret;
> + }
> + __mem_cgroup_commit_charge(memcg, page, 1, type, false);
> + return 0;
> }
>
> static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
> @@ -6635,8 +6618,7 @@ one_by_one:
> batch_count = PRECHARGE_COUNT_AT_ONCE;
> cond_resched();
> }
> - ret = __mem_cgroup_try_charge(NULL,
> - GFP_KERNEL, 1, &memcg, false);
> + ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
> if (ret)
> /* mem_cgroup_clear_mc() will do uncharge later */
> return ret;
> --
> 1.9.0
>

--
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/