[PATCH] mm: refactoring inactive_list_is_low

From: Minchan Kim
Date: Thu Jan 12 2017 - 20:13:36 EST


Recently, Michal Hocko added tracepoint into inactive_list_is_low
for catching why VM decided to age the active list to know
active/inacive balancing problem. With that, unfortunately, it
added "bool trace" to inactlive_list_is_low to control some place
should be prohibited tracing. It is not elegant to me so this patch
try to clean it up.

Normally, most inactive_list_is_low is used for deciding active list
demotion but one site(i.e., get_scan_count) uses for other purpose
which reclaim file LRU forcefully. Sites for deactivation calls it
with shrink_active_list. It means inactive_list_is_low could be
located in shrink_active_list.

One more thing this patch does is to remove "ratio" in the tracepoint
because we can get it by post processing in script via simple math.

Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
include/trace/events/vmscan.h | 9 +++-----
mm/vmscan.c | 51 ++++++++++++++++++++++++-------------------
2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 27e8a5c..406ea95 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -432,9 +432,9 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low,
TP_PROTO(int nid, int reclaim_idx,
unsigned long total_inactive, unsigned long inactive,
unsigned long total_active, unsigned long active,
- unsigned long ratio, int file),
+ int file),

- TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file),
+ TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, file),

TP_STRUCT__entry(
__field(int, nid)
@@ -443,7 +443,6 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low,
__field(unsigned long, inactive)
__field(unsigned long, total_active)
__field(unsigned long, active)
- __field(unsigned long, ratio)
__field(int, reclaim_flags)
),

@@ -454,16 +453,14 @@ TRACE_EVENT(mm_vmscan_inactive_list_is_low,
__entry->inactive = inactive;
__entry->total_active = total_active;
__entry->active = active;
- __entry->ratio = ratio;
__entry->reclaim_flags = trace_shrink_flags(file) & RECLAIM_WB_LRU;
),

- TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s",
+ TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld flags=%s",
__entry->nid,
__entry->reclaim_idx,
__entry->total_inactive, __entry->inactive,
__entry->total_active, __entry->active,
- __entry->ratio,
show_reclaim_flags(__entry->reclaim_flags))
);
#endif /* _TRACE_VMSCAN_H */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 75cdf68..6890c21 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -150,6 +150,7 @@ unsigned long vm_total_pages;

static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
+static bool inactive_list_is_low(bool file, unsigned long, unsigned long);

#ifdef CONFIG_MEMCG
static bool global_reclaim(struct scan_control *sc)
@@ -1962,6 +1963,22 @@ static void shrink_active_list(unsigned long nr_to_scan,
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ unsigned long inactive, active;
+ enum lru_list inactive_lru = file * LRU_FILE;
+ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
+ bool deactivate;
+
+ inactive = lruvec_lru_size_eligibe_zones(lruvec, file * LRU_FILE,
+ sc->reclaim_idx);
+ active = lruvec_lru_size_eligibe_zones(lruvec, file * LRU_FILE +
+ LRU_ACTIVE, sc->reclaim_idx);
+ deactivate = inactive_list_is_low(file, inactive, active);
+ trace_mm_vmscan_inactive_list_is_low(pgdat->node_id,
+ sc->reclaim_idx,
+ lruvec_lru_size(lruvec, inactive_lru), inactive,
+ lruvec_lru_size(lruvec, active_lru), active, file);
+ if (!deactivate)
+ return;

lru_add_drain();

@@ -2073,13 +2090,10 @@ static void shrink_active_list(unsigned long nr_to_scan,
* 1TB 101 10GB
* 10TB 320 32GB
*/
-static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct scan_control *sc, bool trace)
+static bool inactive_list_is_low(bool file,
+ unsigned long inactive, unsigned long active)
{
unsigned long inactive_ratio;
- unsigned long inactive, active;
- enum lru_list inactive_lru = file * LRU_FILE;
- enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
unsigned long gb;

/*
@@ -2089,22 +2103,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
if (!file && !total_swap_pages)
return false;

- inactive = lruvec_lru_size_eligibe_zones(lruvec, inactive_lru, sc->reclaim_idx);
- active = lruvec_lru_size_eligibe_zones(lruvec, active_lru, sc->reclaim_idx);
-
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
inactive_ratio = int_sqrt(10 * gb);
else
inactive_ratio = 1;

- if (trace)
- trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,
- sc->reclaim_idx,
- lruvec_lru_size(lruvec, inactive_lru), inactive,
- lruvec_lru_size(lruvec, active_lru), active,
- inactive_ratio, file);
-
return inactive * inactive_ratio < active;
}

@@ -2112,8 +2116,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
- shrink_active_list(nr_to_scan, lruvec, sc, lru);
+ shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}

@@ -2153,6 +2156,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
enum lru_list lru;
bool some_scanned;
int pass;
+ unsigned long inactive, active;

/*
* If the zone or memcg is small, nr[l] can be 0. This
@@ -2243,7 +2247,11 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, sc, false) &&
+ inactive = lruvec_lru_size_eligibe_zones(lruvec,
+ LRU_FILE, sc->reclaim_idx);
+ active = lruvec_lru_size_eligibe_zones(lruvec,
+ LRU_FILE + LRU_ACTIVE, sc->reclaim_idx);
+ if (!inactive_list_is_low(true, inactive, active) &&
lruvec_lru_size_eligibe_zones(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
@@ -2468,9 +2476,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, sc, true))
- shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
- sc, LRU_ACTIVE_ANON);
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON);
}

/* Use reclaim/compaction for costly allocs or under memory pressure */
@@ -3118,8 +3124,7 @@ static void age_active_anon(struct pglist_data *pgdat,
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);

- if (inactive_list_is_low(lruvec, false, sc, true))
- shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);

memcg = mem_cgroup_iter(NULL, memcg, NULL);
--
2.7.4