Re: mm: pages are not freed from lru_add_pvecs after process termination

From: Michal Hocko
Date: Mon May 02 2016 - 09:00:24 EST


On Thu 28-04-16 16:37:10, Michal Hocko wrote:
[...]
> 7. Hook into vmstat and flush from there? This would drain them
> periodically but it would also introduce an undeterministic interference
> as well.

So I have given this a try (not tested yet) and it doesn't look terribly
complicated. It is hijacking vmstat for a purpose it wasn't intended for
originally but creating a dedicated kenrnel threads/WQ sounds like an
overkill to me. Does this helps or do we have to be more aggressive and
wake up shepherd from the allocator slow path. Could you give it a try
please?
---
diff --git a/mm/internal.h b/mm/internal.h
index b6ead95a0184..876125bd11f4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -488,4 +488,5 @@ extern const struct trace_print_flags pageflag_names[];
extern const struct trace_print_flags vmaflag_names[];
extern const struct trace_print_flags gfpflag_names[];

+extern bool pcp_lru_add_need_drain(int cpu);
#endif /* __MM_INTERNAL_H */
diff --git a/mm/swap.c b/mm/swap.c
index 95916142fc46..3937e6caef96 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -667,6 +667,15 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)

static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);

+bool pcp_lru_add_need_drain(int cpu)
+{
+ return pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
+ pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
+ pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
+ pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
+ need_activate_page_drain(cpu);
+}
+
void lru_add_drain_all(void)
{
static DEFINE_MUTEX(lock);
@@ -680,11 +689,7 @@ void lru_add_drain_all(void)
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);

- if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
- pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
- pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
- pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
- need_activate_page_drain(cpu)) {
+ if (pcp_lru_add_need_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
schedule_work_on(cpu, work);
cpumask_set_cpu(cpu, &has_work);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7397d9548f21..766f751e3467 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -479,6 +479,13 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
int changes = 0;

+ /*
+ * Do not try to drain LRU pcp caches because that might be
+ * expensive - we take locks there etc.
+ */
+ if (do_pagesets && pcp_lru_add_need_drain(smp_processor_id()))
+ lru_add_drain();
+
for_each_populated_zone(zone) {
struct per_cpu_pageset __percpu *p = zone->pageset;

@@ -1477,7 +1484,8 @@ static bool need_update(int cpu)
return true;

}
- return false;
+
+ return pcp_lru_add_need_drain(cpu);
}

void quiet_vmstat(void)
--
Michal Hocko
SUSE Labs