Re: [PATCH 6/7] mm/memcg: rework inactive_ratio calculation

From: KAMEZAWA Hiroyuki
Date: Fri Mar 02 2012 - 00:32:38 EST


On Wed, 29 Feb 2012 13:16:00 +0400
Konstantin Khlebnikov <khlebnikov@xxxxxxxxxx> wrote:

> This patch removes precalculated zone->inactive_ratio.
> Now it always calculated in inactive_anon_is_low() from current lru size.
> After that we can merge memcg and non-memcg cases and drop duplicated code.
>
> We can drop precalculated ratio, because its calculation fast enough to do it
> each time. Plus precalculation uses zone size as basis, this estimation not
> always match with page lru size, for example if a significant proportion
> of memory occupied by kernel objects.
>
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxx>

Maybe good....but please don't change the user interface /proc/zoneinfo implicitly.
How about calculating inactive_ratio at reading /proc/zoneinfo ?

Thanks,
-Kame




> ---
> include/linux/memcontrol.h | 16 --------
> include/linux/mmzone.h | 7 ----
> mm/memcontrol.c | 38 -------------------
> mm/page_alloc.c | 44 ----------------------
> mm/vmscan.c | 88 ++++++++++++++++++++++++++++----------------
> mm/vmstat.c | 6 +--
> 6 files changed, 58 insertions(+), 141 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index e2e1fac..7e114f8 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -117,10 +117,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
> /*
> * For memory reclaim.
> */
> -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
> - struct zone *zone);
> -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
> - struct zone *zone);
> int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
> unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
> int nid, int zid, unsigned int lrumask);
> @@ -334,18 +330,6 @@ static inline bool mem_cgroup_disabled(void)
> return true;
> }
>
> -static inline int
> -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> - return 1;
> -}
> -
> -static inline int
> -mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> - return 1;
> -}
> -
> static inline unsigned long
> mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
> unsigned int lru_mask)
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index fdcd683..7edcf17 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -384,13 +384,6 @@ struct zone {
> /* Zone statistics */
> atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
>
> - /*
> - * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
> - * this zone's LRU. Maintained by the pageout code.
> - */
> - unsigned int inactive_ratio;
> -
> -
> ZONE_PADDING(_pad2_)
> /* Rarely used or read-mostly fields */
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2809531..4bc6835 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1171,44 +1171,6 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
> return ret;
> }
>
> -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> - unsigned long inactive_ratio;
> - int nid = zone_to_nid(zone);
> - int zid = zone_idx(zone);
> - unsigned long inactive;
> - unsigned long active;
> - unsigned long gb;
> -
> - inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> - BIT(LRU_INACTIVE_ANON));
> - active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> - BIT(LRU_ACTIVE_ANON));
> -
> - gb = (inactive + active) >> (30 - PAGE_SHIFT);
> - if (gb)
> - inactive_ratio = int_sqrt(10 * gb);
> - else
> - inactive_ratio = 1;
> -
> - return inactive * inactive_ratio < active;
> -}
> -
> -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> - unsigned long active;
> - unsigned long inactive;
> - int zid = zone_idx(zone);
> - int nid = zone_to_nid(zone);
> -
> - inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> - BIT(LRU_INACTIVE_FILE));
> - active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> - BIT(LRU_ACTIVE_FILE));
> -
> - return (active > inactive);
> -}
> -
> struct zone_reclaim_stat *
> mem_cgroup_get_reclaim_stat_from_page(struct page *page)
> {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index ea40034..2e90931 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5051,49 +5051,6 @@ void setup_per_zone_wmarks(void)
> }
>
> /*
> - * The inactive anon list should be small enough that the VM never has to
> - * do too much work, but large enough that each inactive page has a chance
> - * to be referenced again before it is swapped out.
> - *
> - * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
> - * INACTIVE_ANON pages on this zone's LRU, maintained by the
> - * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
> - * the anonymous pages are kept on the inactive list.
> - *
> - * total target max
> - * memory ratio inactive anon
> - * -------------------------------------
> - * 10MB 1 5MB
> - * 100MB 1 50MB
> - * 1GB 3 250MB
> - * 10GB 10 0.9GB
> - * 100GB 31 3GB
> - * 1TB 101 10GB
> - * 10TB 320 32GB
> - */
> -static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
> -{
> - unsigned int gb, ratio;
> -
> - /* Zone size in gigabytes */
> - gb = zone->present_pages >> (30 - PAGE_SHIFT);
> - if (gb)
> - ratio = int_sqrt(10 * gb);
> - else
> - ratio = 1;
> -
> - zone->inactive_ratio = ratio;
> -}
> -
> -static void __meminit setup_per_zone_inactive_ratio(void)
> -{
> - struct zone *zone;
> -
> - for_each_zone(zone)
> - calculate_zone_inactive_ratio(zone);
> -}
> -
> -/*
> * Initialise min_free_kbytes.
> *
> * For small machines we want it small (128k min). For large machines
> @@ -5131,7 +5088,6 @@ int __meminit init_per_zone_wmark_min(void)
> setup_per_zone_wmarks();
> refresh_zone_stat_thresholds();
> setup_per_zone_lowmem_reserve();
> - setup_per_zone_inactive_ratio();
> return 0;
> }
> module_init(init_per_zone_wmark_min)
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index fe00a22..ab447df 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1750,29 +1750,38 @@ static void shrink_active_list(unsigned long nr_to_scan,
> }
>
> #ifdef CONFIG_SWAP
> -static int inactive_anon_is_low_global(struct zone *zone)
> -{
> - unsigned long active, inactive;
> -
> - active = zone_page_state(zone, NR_ACTIVE_ANON);
> - inactive = zone_page_state(zone, NR_INACTIVE_ANON);
> -
> - if (inactive * zone->inactive_ratio < active)
> - return 1;
> -
> - return 0;
> -}
> -
> /**
> * inactive_anon_is_low - check if anonymous pages need to be deactivated
> * @zone: zone to check
> - * @sc: scan control of this context
> *
> * Returns true if the zone does not have enough inactive anon pages,
> * meaning some active anon pages need to be deactivated.
> + *
> + * The inactive anon list should be small enough that the VM never has to
> + * do too much work, but large enough that each inactive page has a chance
> + * to be referenced again before it is swapped out.
> + *
> + * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
> + * INACTIVE_ANON pages on this zone's LRU, maintained by the
> + * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
> + * the anonymous pages are kept on the inactive list.
> + *
> + * total target max
> + * memory ratio inactive anon
> + * -------------------------------------
> + * 10MB 1 5MB
> + * 100MB 1 50MB
> + * 1GB 3 250MB
> + * 10GB 10 0.9GB
> + * 100GB 31 3GB
> + * 1TB 101 10GB
> + * 10TB 320 32GB
> */
> static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
> {
> + unsigned long active, inactive;
> + unsigned int gb, ratio;
> +
> /*
> * If we don't have swap space, anonymous page deactivation
> * is pointless.
> @@ -1780,11 +1789,26 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
> if (!total_swap_pages)
> return 0;
>
> - if (!mem_cgroup_disabled())
> - return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
> - mz->zone);
> + if (mem_cgroup_disabled()) {
> + active = zone_page_state(mz->zone, NR_ACTIVE_ANON);
> + inactive = zone_page_state(mz->zone, NR_INACTIVE_ANON);
> + } else {
> + active = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> + zone_to_nid(mz->zone), zone_idx(mz->zone),
> + BIT(LRU_ACTIVE_ANON));
> + inactive = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> + zone_to_nid(mz->zone), zone_idx(mz->zone),
> + BIT(LRU_INACTIVE_ANON));
> + }
> +
> + /* Total size in gigabytes */
> + gb = (active + inactive) >> (30 - PAGE_SHIFT);
> + if (gb)
> + ratio = int_sqrt(10 * gb);
> + else
> + ratio = 1;
>
> - return inactive_anon_is_low_global(mz->zone);
> + return inactive * ratio < active;
> }
> #else
> static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
> @@ -1793,16 +1817,6 @@ static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
> }
> #endif
>
> -static int inactive_file_is_low_global(struct zone *zone)
> -{
> - unsigned long active, inactive;
> -
> - active = zone_page_state(zone, NR_ACTIVE_FILE);
> - inactive = zone_page_state(zone, NR_INACTIVE_FILE);
> -
> - return (active > inactive);
> -}
> -
> /**
> * inactive_file_is_low - check if file pages need to be deactivated
> * @mz: memory cgroup and zone to check
> @@ -1819,11 +1833,21 @@ static int inactive_file_is_low_global(struct zone *zone)
> */
> static int inactive_file_is_low(struct mem_cgroup_zone *mz)
> {
> - if (!mem_cgroup_disabled())
> - return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
> - mz->zone);
> + unsigned long active, inactive;
> +
> + if (mem_cgroup_disabled()) {
> + active = zone_page_state(mz->zone, NR_ACTIVE_FILE);
> + inactive = zone_page_state(mz->zone, NR_INACTIVE_FILE);
> + } else {
> + active = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> + zone_to_nid(mz->zone), zone_idx(mz->zone),
> + BIT(LRU_ACTIVE_FILE));
> + inactive = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> + zone_to_nid(mz->zone), zone_idx(mz->zone),
> + BIT(LRU_INACTIVE_FILE));
> + }
>
> - return inactive_file_is_low_global(mz->zone);
> + return inactive < active;
> }
>
> static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index f600557..2c813e1 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1017,11 +1017,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
> }
> seq_printf(m,
> "\n all_unreclaimable: %u"
> - "\n start_pfn: %lu"
> - "\n inactive_ratio: %u",
> + "\n start_pfn: %lu",
> zone->all_unreclaimable,
> - zone->zone_start_pfn,
> - zone->inactive_ratio);
> + zone->zone_start_pfn);
> seq_putc(m, '\n');
> }
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/