Re: [patch 2/2] mm: page-writeback: do not count anon pages asdirtyable memory
From: Michal Hocko
Date: Tue Jan 28 2014 - 08:58:19 EST
On Fri 24-01-14 17:03:04, Johannes Weiner wrote:
> The VM is currently heavily tuned to avoid swapping. Whether that is
> good or bad is a separate discussion, but as long as the VM won't swap
> to make room for dirty cache, we can not consider anonymous pages when
> calculating the amount of dirtyable memory, the baseline to which
> dirty_background_ratio and dirty_ratio are applied.
>
> A simple workload that occupies a significant size (40+%, depending on
> memory layout, storage speeds etc.) of memory with anon/tmpfs pages
> and uses the remainder for a streaming writer demonstrates this
> problem. In that case, the actual cache pages are a small fraction of
> what is considered dirtyable overall, which results in an relatively
> large portion of the cache pages to be dirtied. As kswapd starts
> rotating these, random tasks enter direct reclaim and stall on IO.
>
> Only consider free pages and file pages dirtyable.
>
> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Reviewed-by: Michal Hocko <mhocko@xxxxxxx>
> ---
> include/linux/vmstat.h | 2 --
> mm/internal.h | 1 -
> mm/page-writeback.c | 6 ++++--
> mm/vmscan.c | 23 +----------------------
> 4 files changed, 5 insertions(+), 27 deletions(-)
>
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index e4b948080d20..a67b38415768 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -142,8 +142,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
> return x;
> }
>
> -extern unsigned long global_reclaimable_pages(void);
> -
> #ifdef CONFIG_NUMA
> /*
> * Determine the per node value of a stat item. This function
> diff --git a/mm/internal.h b/mm/internal.h
> index 684f7aa9692a..8b6cfd63b5a5 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -85,7 +85,6 @@ extern unsigned long highest_memmap_pfn;
> */
> extern int isolate_lru_page(struct page *page);
> extern void putback_lru_page(struct page *page);
> -extern unsigned long zone_reclaimable_pages(struct zone *zone);
> extern bool zone_reclaimable(struct zone *zone);
>
> /*
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 79cf52b058a7..29e129478644 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -205,7 +205,8 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
> nr_pages = zone_page_state(zone, NR_FREE_PAGES);
> nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
>
> - nr_pages += zone_reclaimable_pages(zone);
> + nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
> + nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
>
> return nr_pages;
> }
> @@ -259,7 +260,8 @@ static unsigned long global_dirtyable_memory(void)
> x = global_page_state(NR_FREE_PAGES);
> x -= min(x, dirty_balance_reserve);
>
> - x += global_reclaimable_pages();
> + x += global_page_state(NR_INACTIVE_FILE);
> + x += global_page_state(NR_ACTIVE_FILE);
>
> if (!vm_highmem_is_dirtyable)
> x -= highmem_dirtyable_memory(x);
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index eea668d9cff6..05e6095159dc 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -147,7 +147,7 @@ static bool global_reclaim(struct scan_control *sc)
> }
> #endif
>
> -unsigned long zone_reclaimable_pages(struct zone *zone)
> +static unsigned long zone_reclaimable_pages(struct zone *zone)
> {
> int nr;
>
> @@ -3297,27 +3297,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
> wake_up_interruptible(&pgdat->kswapd_wait);
> }
>
> -/*
> - * The reclaimable count would be mostly accurate.
> - * The less reclaimable pages may be
> - * - mlocked pages, which will be moved to unevictable list when encountered
> - * - mapped pages, which may require several travels to be reclaimed
> - * - dirty pages, which is not "instantly" reclaimable
> - */
> -unsigned long global_reclaimable_pages(void)
> -{
> - int nr;
> -
> - nr = global_page_state(NR_ACTIVE_FILE) +
> - global_page_state(NR_INACTIVE_FILE);
> -
> - if (get_nr_swap_pages() > 0)
> - nr += global_page_state(NR_ACTIVE_ANON) +
> - global_page_state(NR_INACTIVE_ANON);
> -
> - return nr;
> -}
> -
> #ifdef CONFIG_HIBERNATION
> /*
> * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
> --
> 1.8.4.2
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxxx For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>
--
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/