Re: [RFC 1/3] mm, oom: refactor oom detection

From: Hillf Danton
Date: Fri Oct 30 2015 - 00:16:19 EST


> +/*
> + * Number of backoff steps for potentially reclaimable pages if the direct reclaim
> + * cannot make any progress. Each step will reduce 1/MAX_STALL_BACKOFF of the
> + * reclaimable memory.
> + */
> +#define MAX_STALL_BACKOFF 16
> +
> static inline struct page *
> __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> struct alloc_context *ac)
> @@ -2984,6 +2991,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> enum migrate_mode migration_mode = MIGRATE_ASYNC;
> bool deferred_compaction = false;
> int contended_compaction = COMPACT_CONTENDED_NONE;
> + struct zone *zone;
> + struct zoneref *z;
> + int stall_backoff = 0;
>
> /*
> * In the slowpath, we sanity check order to avoid ever trying to
> @@ -3135,13 +3145,56 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> if (gfp_mask & __GFP_NORETRY)
> goto noretry;
>
> - /* Keep reclaiming pages as long as there is reasonable progress */
> + /*
> + * Do not retry high order allocations unless they are __GFP_REPEAT
> + * and even then do not retry endlessly.
> + */
> pages_reclaimed += did_some_progress;
> - if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
> - ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
> - /* Wait for some write requests to complete then retry */
> - wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50);
> - goto retry;
> + if (order > PAGE_ALLOC_COSTLY_ORDER) {
> + if (!(gfp_mask & __GFP_REPEAT) || pages_reclaimed >= (1<<order))
> + goto noretry;
> +
> + if (did_some_progress)
> + goto retry;
> + }
> +
> + /*
> + * Be optimistic and consider all pages on reclaimable LRUs as usable
> + * but make sure we converge to OOM if we cannot make any progress after
> + * multiple consecutive failed attempts.
> + */
> + if (did_some_progress)
> + stall_backoff = 0;
> + else
> + stall_backoff = min(stall_backoff+1, MAX_STALL_BACKOFF);
> +
> + /*
> + * Keep reclaiming pages while there is a chance this will lead somewhere.
> + * If none of the target zones can satisfy our allocation request even
> + * if all reclaimable pages are considered then we are screwed and have
> + * to go OOM.
> + */
> + for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, ac->nodemask) {
> + unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
> + unsigned long reclaimable;
> + unsigned long target;
> +
> + reclaimable = zone_reclaimable_pages(zone) +
> + zone_page_state(zone, NR_ISOLATED_FILE) +
> + zone_page_state(zone, NR_ISOLATED_ANON);
> + target = reclaimable;
> + target -= stall_backoff * (1 + target/MAX_STALL_BACKOFF);

target = reclaimable - stall_backoff * (1 + target/MAX_STALL_BACKOFF);
= reclaimable - stall_backoff - stall_backoff * (target/MAX_STALL_BACKOFF);

then the first stall_backoff looks unreasonable.
I guess you mean
target = reclaimable - target * (stall_backoff/MAX_STALL_BACKOFF);
= reclaimable - stall_back * (target/MAX_STALL_BACKOFF);

> + target += free;
> +
> + /*
> + * Would the allocation succeed if we reclaimed the whole target?
> + */
> + if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
> + ac->high_zoneidx, alloc_flags, target)) {
> + /* Wait for some write requests to complete then retry */
> + wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
> + goto retry;
> + }
> }
>
[...]
/*
> @@ -2734,10 +2730,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> goto retry;
> }
>
> - /* Any of the zones still reclaimable? Don't OOM. */
> - if (zones_reclaimable)
> - return 1;
> -

Looks cleanup of zones_reclaimable left.
> return 0;
> }
>
> --
> 2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/