[patch] mm: vmscan: treat memcg low limit as hard guarantee

From: Johannes Weiner
Date: Wed May 28 2014 - 09:37:05 EST


Don't hide low limit configuration problems behind weak semantics and
quietly breach the set-up guarantees.

Make it simple: memcg guarantees are equivalent to mlocked memory,
anonymous memory without swap, kernel memory, pinned memory etc. -
unreclaimable. If no memory can be reclaimed without otherwise
breaching guarantees, it's a real problem, so let the machine OOM and
dump the memory state in that situation.

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
include/linux/memcontrol.h | 5 -----
mm/memcontrol.c | 15 ---------------
mm/vmscan.c | 41 +++++------------------------------------
3 files changed, 5 insertions(+), 56 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a5cf853129ec..c3a53cbb88eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -94,7 +94,6 @@ bool task_in_mem_cgroup(struct task_struct *task,

extern bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
struct mem_cgroup *root);
-extern bool mem_cgroup_all_within_guarantee(struct mem_cgroup *root);

extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -297,10 +296,6 @@ static inline bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
{
return false;
}
-static inline bool mem_cgroup_all_within_guarantee(struct mem_cgroup *root)
-{
- return false;
-}

static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4df733e13727..85fdef53fcf1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2788,7 +2788,6 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
*
* The given group is within its reclaim gurantee if it is below its low limit
* or the same applies for any parent up the hierarchy until root (including).
- * Such a group might be excluded from the reclaim.
*/
bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
struct mem_cgroup *root)
@@ -2801,25 +2800,11 @@ bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
return true;
if (memcg == root)
break;
-
} while ((memcg = parent_mem_cgroup(memcg)));

return false;
}

-bool mem_cgroup_all_within_guarantee(struct mem_cgroup *root)
-{
- struct mem_cgroup *iter;
-
- for_each_mem_cgroup_tree(iter, root)
- if (!mem_cgroup_within_guarantee(iter, root)) {
- mem_cgroup_iter_break(root, iter);
- return false;
- }
-
- return true;
-}
-
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
struct mem_cgroup *memcg = NULL;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8ffe4e616fe..c72493e8fb53 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2244,20 +2244,14 @@ static inline bool should_continue_reclaim(struct zone *zone,
}

/**
- * __shrink_zone - shrinks a given zone
+ * shrink_zone - shrinks a given zone
*
* @zone: zone to shrink
* @sc: scan control with additional reclaim parameters
- * @honor_memcg_guarantee: do not reclaim memcgs which are within their memory
- * guarantee
- *
- * Returns the number of reclaimed memcgs.
*/
-static unsigned __shrink_zone(struct zone *zone, struct scan_control *sc,
- bool honor_memcg_guarantee)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
- unsigned nr_scanned_groups = 0;

do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2274,20 +2268,16 @@ static unsigned __shrink_zone(struct zone *zone, struct scan_control *sc,
do {
struct lruvec *lruvec;

- /* Memcg might be protected from the reclaim */
- if (honor_memcg_guarantee &&
- mem_cgroup_within_guarantee(memcg, root)) {
+ /* Don't reclaim guaranteed memory */
+ if (mem_cgroup_within_guarantee(memcg, root)) {
/*
- * It would be more optimal to skip the memcg
- * subtree now but we do not have a memcg iter
- * helper for that. Anyone?
+ * XXX: skip the entire subtree here
*/
memcg = mem_cgroup_iter(root, memcg, &reclaim);
continue;
}

lruvec = mem_cgroup_zone_lruvec(zone, memcg);
- nr_scanned_groups++;

sc->swappiness = mem_cgroup_swappiness(memcg);
shrink_lruvec(lruvec, sc);
@@ -2316,27 +2306,6 @@ static unsigned __shrink_zone(struct zone *zone, struct scan_control *sc,

} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
-
- return nr_scanned_groups;
-}
-
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
-{
- bool honor_guarantee = true;
-
- while (!__shrink_zone(zone, sc, honor_guarantee)) {
- /*
- * The previous round of reclaim didn't find anything to scan
- * because
- * a) the whole reclaimed hierarchy is within guarantee so
- * we fallback to ignore the guarantee because other option
- * would be the OOM
- * b) multiple reclaimers are racing and so the first round
- * should be retried
- */
- if (mem_cgroup_all_within_guarantee(sc->target_mem_cgroup))
- honor_guarantee = false;
- }
}

/* Returns true if compaction should go ahead for a high-order request */
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/