Re: Found the commit that causes the OOMs

From: Minchan Kim
Date: Sun Jun 28 2009 - 12:50:31 EST


Looks good.

David, Can you test with this patch ?

On Mon, Jun 29, 2009 at 12:10 AM, Wu Fengguang<fengguang.wu@xxxxxxxxx> wrote:
> On Sun, Jun 28, 2009 at 11:01:40PM +0800, KOSAKI Motohiro wrote:
>> > Yes, smaller inactive_anon means smaller (pointless) nr_scanned,
>> > and therefore less slab scans. Strictly speaking, it's not the fault
>> > of your patch. It indicates that the slab scan ratio algorithm should
>> > be updated too :)
>>
>> I don't think this patch is related to minchan's patch.
>> but I think this patch is good.
>
> OK.
>
>>
>> > We could refine the estimation of "reclaimable" pages like this:
>>
>> hmhm, reasonable idea.
>
> Thank you.
>
>> >
>> > diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
>> > index 416f748..e9c5b0e 100644
>> > --- a/include/linux/vmstat.h
>> > +++ b/include/linux/vmstat.h
>> > @@ -167,14 +167,7 @@ static inline unsigned long zone_page_state(struct zone *zone,
>> > Â}
>> >
>> > Âextern unsigned long global_lru_pages(void);
>> > -
>> > -static inline unsigned long zone_lru_pages(struct zone *zone)
>> > -{
>> > - Â Â Â return (zone_page_state(zone, NR_ACTIVE_ANON)
>> > - Â Â Â Â Â Â Â + zone_page_state(zone, NR_ACTIVE_FILE)
>> > - Â Â Â Â Â Â Â + zone_page_state(zone, NR_INACTIVE_ANON)
>> > - Â Â Â Â Â Â Â + zone_page_state(zone, NR_INACTIVE_FILE));
>> > -}
>> > +extern unsigned long zone_lru_pages(void);
>> >
>> > Â#ifdef CONFIG_NUMA
>> > Â/*
>> > diff --git a/mm/vmscan.c b/mm/vmscan.c
>> > index 026f452..4281c6f 100644
>> > --- a/mm/vmscan.c
>> > +++ b/mm/vmscan.c
>> > @@ -2123,10 +2123,31 @@ void wakeup_kswapd(struct zone *zone, int order)
>> >
>> > Âunsigned long global_lru_pages(void)
>> > Â{
>> > - Â Â Â return global_page_state(NR_ACTIVE_ANON)
>> > - Â Â Â Â Â Â Â + global_page_state(NR_ACTIVE_FILE)
>> > - Â Â Â Â Â Â Â + global_page_state(NR_INACTIVE_ANON)
>> > - Â Â Â Â Â Â Â + global_page_state(NR_INACTIVE_FILE);
>> > + Â Â Â int nr;
>> > +
>> > + Â Â Â nr = global_page_state(zone, NR_ACTIVE_FILE) +
>> > + Â Â Â Â Â Âglobal_page_state(zone, NR_INACTIVE_FILE);
>> > +
>> > + Â Â Â if (total_swap_pages)
>> > + Â Â Â Â Â Â Â nr += global_page_state(zone, NR_ACTIVE_ANON) +
>> > + Â Â Â Â Â Â Â Â Â Â global_page_state(zone, NR_INACTIVE_ANON);
>> > +
>> > + Â Â Â return nr;
>> > +}
>>
>> Please change function name too.
>> Now, this function only account reclaimable pages.
>
> Good suggestion - I did considered renaming them to *_relaimable_pages.
>
>> Plus, total_swap_pages is bad. if we need to concern "reclaimable
>> pages", we should use nr_swap_pages.
>
>> I mean, swap-full also makes anon is unreclaimable althouth system
>> have sone swap device.
>
> Right, changed to (nr_swap_pages > 0).
>
> Thanks,
> Fengguang
> ---
>
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 416f748..8d8aa20 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -166,15 +166,8 @@ static inline unsigned long zone_page_state(struct zone *zone,
> Â Â Â Âreturn x;
> Â}
>
> -extern unsigned long global_lru_pages(void);
> -
> -static inline unsigned long zone_lru_pages(struct zone *zone)
> -{
> - Â Â Â return (zone_page_state(zone, NR_ACTIVE_ANON)
> - Â Â Â Â Â Â Â + zone_page_state(zone, NR_ACTIVE_FILE)
> - Â Â Â Â Â Â Â + zone_page_state(zone, NR_INACTIVE_ANON)
> - Â Â Â Â Â Â Â + zone_page_state(zone, NR_INACTIVE_FILE));
> -}
> +extern unsigned long global_reclaimable_pages(void);
> +extern unsigned long zone_reclaimable_pages(void);
>
> Â#ifdef CONFIG_NUMA
> Â/*
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index a91b870..74c3067 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -394,7 +394,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
> Â Â Â Â Â Â Â Âstruct zone *z =
> Â Â Â Â Â Â Â Â Â Â Â Â&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
>
> - Â Â Â Â Â Â Â x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
> + Â Â Â Â Â Â Â x += zone_page_state(z, NR_FREE_PAGES) +
> + Â Â Â Â Â Â Â Â Â Âzone_reclaimable_pages(z);
> Â Â Â Â}
> Â Â Â Â/*
> Â Â Â Â * Make sure that the number of highmem pages is never larger
> @@ -418,7 +419,7 @@ unsigned long determine_dirtyable_memory(void)
> Â{
> Â Â Â Âunsigned long x;
>
> - Â Â Â x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
> + Â Â Â x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
>
> Â Â Â Âif (!vm_highmem_is_dirtyable)
> Â Â Â Â Â Â Â Âx -= highmem_dirtyable_memory(x);
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 026f452..3768332 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1693,7 +1693,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> Â Â Â Â Â Â Â Â Â Â Â Âif (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
>
> - Â Â Â Â Â Â Â Â Â Â Â lru_pages += zone_lru_pages(zone);
> + Â Â Â Â Â Â Â Â Â Â Â lru_pages += zone_reclaimable_pages(zone);
> Â Â Â Â Â Â Â Â}
> Â Â Â Â}
>
> @@ -1910,7 +1910,7 @@ loop_again:
> Â Â Â Â Â Â Â Âfor (i = 0; i <= end_zone; i++) {
> Â Â Â Â Â Â Â Â Â Â Â Âstruct zone *zone = pgdat->node_zones + i;
>
> - Â Â Â Â Â Â Â Â Â Â Â lru_pages += zone_lru_pages(zone);
> + Â Â Â Â Â Â Â Â Â Â Â lru_pages += zone_reclaimable_pages(zone);
> Â Â Â Â Â Â Â Â}
>
> Â Â Â Â Â Â Â Â/*
> @@ -1954,7 +1954,7 @@ loop_again:
> Â Â Â Â Â Â Â Â Â Â Â Âif (zone_is_all_unreclaimable(zone))
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
> Â Â Â Â Â Â Â Â Â Â Â Âif (nr_slab == 0 && zone->pages_scanned >=
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (zone_lru_pages(zone) * 6))
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (zone_reclaimable_pages(zone) * 6))
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âzone_set_flag(zone,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ÂZONE_ALL_UNRECLAIMABLE);
> Â Â Â Â Â Â Â Â Â Â Â Â/*
> @@ -2121,12 +2121,33 @@ void wakeup_kswapd(struct zone *zone, int order)
> Â Â Â Âwake_up_interruptible(&pgdat->kswapd_wait);
> Â}
>
> -unsigned long global_lru_pages(void)
> +unsigned long global_reclaimable_pages(void)
> Â{
> - Â Â Â return global_page_state(NR_ACTIVE_ANON)
> - Â Â Â Â Â Â Â + global_page_state(NR_ACTIVE_FILE)
> - Â Â Â Â Â Â Â + global_page_state(NR_INACTIVE_ANON)
> - Â Â Â Â Â Â Â + global_page_state(NR_INACTIVE_FILE);
> + Â Â Â int nr;
> +
> + Â Â Â nr = global_page_state(zone, NR_ACTIVE_FILE) +
> + Â Â Â Â Â Âglobal_page_state(zone, NR_INACTIVE_FILE);
> +
> + Â Â Â if (total_swap_pages)
> + Â Â Â Â Â Â Â nr += global_page_state(zone, NR_ACTIVE_ANON) +
> + Â Â Â Â Â Â Â Â Â Â global_page_state(zone, NR_INACTIVE_ANON);
> +
> + Â Â Â return nr;
> +}
> +
> +
> +unsigned long zone_reclaimable_pages(struct zone *zone)
> +{
> + Â Â Â int nr;
> +
> + Â Â Â nr = zone_page_state(zone, NR_ACTIVE_FILE) +
> + Â Â Â Â Â Âzone_page_state(zone, NR_INACTIVE_FILE);
> +
> + Â Â Â if (nr_swap_pages > 0)
> + Â Â Â Â Â Â Â nr += zone_page_state(zone, NR_ACTIVE_ANON) +
> + Â Â Â Â Â Â Â Â Â Â zone_page_state(zone, NR_INACTIVE_ANON);
> +
> + Â Â Â return nr;
> Â}
>
> Â#ifdef CONFIG_HIBERNATION
> @@ -2198,7 +2219,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
>
> Â Â Â Âcurrent->reclaim_state = &reclaim_state;
>
> - Â Â Â lru_pages = global_lru_pages();
> + Â Â Â lru_pages = global_reclaimable_pages();
> Â Â Â Ânr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
> Â Â Â Â/* If slab caches are huge, it's better to hit them first */
> Â Â Â Âwhile (nr_slab >= lru_pages) {
> @@ -2240,7 +2261,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
>
> Â Â Â Â Â Â Â Â Â Â Â Âreclaim_state.reclaimed_slab = 0;
> Â Â Â Â Â Â Â Â Â Â Â Âshrink_slab(sc.nr_scanned, sc.gfp_mask,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â global_lru_pages());
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â global_reclaimable_pages());
> Â Â Â Â Â Â Â Â Â Â Â Âsc.nr_reclaimed += reclaim_state.reclaimed_slab;
> Â Â Â Â Â Â Â Â Â Â Â Âif (sc.nr_reclaimed >= nr_pages)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âgoto out;
> @@ -2257,7 +2278,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
> Â Â Â Âif (!sc.nr_reclaimed) {
> Â Â Â Â Â Â Â Âdo {
> Â Â Â Â Â Â Â Â Â Â Â Âreclaim_state.reclaimed_slab = 0;
> - Â Â Â Â Â Â Â Â Â Â Â shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
> + Â Â Â Â Â Â Â Â Â Â Â shrink_slab(nr_pages, sc.gfp_mask,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â global_reclaimable_pages());
> Â Â Â Â Â Â Â Â Â Â Â Âsc.nr_reclaimed += reclaim_state.reclaimed_slab;
> Â Â Â Â Â Â Â Â} while (sc.nr_reclaimed < nr_pages &&
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âreclaim_state.reclaimed_slab > 0);
>



--
Kinds regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/