Re: [PATCH v2 4/4] mm, compaction: direct freepage allocation for async direct compaction
From: Joonsoo Kim
Date: Mon Apr 11 2016 - 03:11:15 EST
On Thu, Mar 31, 2016 at 10:50:36AM +0200, Vlastimil Babka wrote:
> The goal of direct compaction is to quickly make a high-order page available
> for the pending allocation. The free page scanner can add significant latency
> when searching for migration targets, although to succeed the compaction, the
> only important limit on the target free pages is that they must not come from
> the same order-aligned block as the migrated pages.
If we fails migration, free pages will remain and they can interfere
further compaction success because they doesn't come from previous
order-aligned block but can come from next order-aligned block. You
need to free remaining freelist after migration attempt fails?
Thanks.
>
> This patch therefore makes direct async compaction allocate freepages directly
> from freelists. Pages that do come from the same block (which we cannot simply
> exclude from the freelist allocation) are put on separate list and released
> only after migration to allow them to merge.
>
> In addition to reduced stall, another advantage is that we split larger free
> pages for migration targets only when smaller pages are depleted, while the
> free scanner can split pages up to (order - 1) as it encouters them. However,
> this approach likely sacrifices some of the long-term anti-fragmentation
> features of a thorough compaction, so we limit the direct allocation approach
> to direct async compaction.
>
> For observational purposes, the patch introduces two new counters to
> /proc/vmstat. compact_free_direct_alloc counts how many pages were allocated
> directly without scanning, and compact_free_direct_miss counts the subset of
> these allocations that were from the wrong range and had to be held on the
> separate list.
>
> Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
> ---
> include/linux/vm_event_item.h | 1 +
> mm/compaction.c | 52 ++++++++++++++++++++++++++++++++++++++++++-
> mm/internal.h | 5 +++++
> mm/page_alloc.c | 27 ++++++++++++++++++++++
> mm/vmstat.c | 2 ++
> 5 files changed, 86 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index ec084321fe09..9ec29406a01e 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -51,6 +51,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
> #endif
> #ifdef CONFIG_COMPACTION
> COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
> + COMPACTFREE_DIRECT_ALLOC, COMPACTFREE_DIRECT_MISS,
> COMPACTISOLATED,
> COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
> KCOMPACTD_WAKE,
> diff --git a/mm/compaction.c b/mm/compaction.c
> index fe94d22d9144..215db281ecaf 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1083,6 +1083,41 @@ static void isolate_freepages(struct compact_control *cc)
> cc->free_pfn = isolate_start_pfn;
> }
>
> +static void isolate_freepages_direct(struct compact_control *cc)
> +{
> + unsigned long nr_pages;
> + unsigned long flags;
> +
> + nr_pages = cc->nr_migratepages - cc->nr_freepages;
> +
> + if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
> + return;
> +
> + while (nr_pages) {
> + struct page *page;
> + unsigned long pfn;
> +
> + page = alloc_pages_zone(cc->zone, 0, MIGRATE_MOVABLE);
> + if (!page)
> + break;
> + pfn = page_to_pfn(page);
> +
> + count_compact_event(COMPACTFREE_DIRECT_ALLOC);
> +
> + /* Is the free page in the block we are migrating from? */
> + if (pfn >> cc->order == (cc->migrate_pfn - 1) >> cc->order) {
> + list_add(&page->lru, &cc->freepages_held);
> + count_compact_event(COMPACTFREE_DIRECT_MISS);
> + } else {
> + list_add(&page->lru, &cc->freepages);
> + cc->nr_freepages++;
> + nr_pages--;
> + }
> + }
> +
> + spin_unlock_irqrestore(&cc->zone->lock, flags);
> +}
> +
> /*
> * This is a migrate-callback that "allocates" freepages by taking pages
> * from the isolated freelists in the block we are migrating to.
> @@ -1099,7 +1134,12 @@ static struct page *compaction_alloc(struct page *migratepage,
> * contention.
> */
> if (list_empty(&cc->freepages)) {
> - if (!cc->contended)
> + if (cc->contended)
> + return NULL;
> +
> + if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC))
> + isolate_freepages_direct(cc);
> + else
> isolate_freepages(cc);
>
> if (list_empty(&cc->freepages))
> @@ -1475,6 +1515,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
> (cc->mode == MIGRATE_ASYNC)) {
> cc->migrate_pfn = block_end_pfn(
> cc->migrate_pfn - 1, cc->order);
> +
> + if (!list_empty(&cc->freepages_held))
> + release_freepages(&cc->freepages_held);
> +
> /* Draining pcplists is useless in this case */
> cc->last_migrated_pfn = 0;
>
> @@ -1495,6 +1539,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
> block_start_pfn(cc->migrate_pfn, cc->order);
>
> if (cc->last_migrated_pfn < current_block_start) {
> + if (!list_empty(&cc->freepages_held))
> + release_freepages(&cc->freepages_held);
> cpu = get_cpu();
> lru_add_drain_cpu(cpu);
> drain_local_pages(zone);
> @@ -1525,6 +1571,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
> if (free_pfn > zone->compact_cached_free_pfn)
> zone->compact_cached_free_pfn = free_pfn;
> }
> + if (!list_empty(&cc->freepages_held))
> + release_freepages(&cc->freepages_held);
>
> trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
> cc->free_pfn, end_pfn, sync, ret);
> @@ -1553,6 +1601,7 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
> };
> INIT_LIST_HEAD(&cc.freepages);
> INIT_LIST_HEAD(&cc.migratepages);
> + INIT_LIST_HEAD(&cc.freepages_held);
>
> ret = compact_zone(zone, &cc);
>
> @@ -1698,6 +1747,7 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
> cc->zone = zone;
> INIT_LIST_HEAD(&cc->freepages);
> INIT_LIST_HEAD(&cc->migratepages);
> + INIT_LIST_HEAD(&cc->freepages_held);
>
> /*
> * When called via /proc/sys/vm/compact_memory
> diff --git a/mm/internal.h b/mm/internal.h
> index b79abb6721cf..a0c0286a9567 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -145,6 +145,8 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
> }
>
> extern int __isolate_free_page(struct page *page, unsigned int order);
> +extern struct page * alloc_pages_zone(struct zone *zone, unsigned int order,
> + int migratetype);
> extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
> unsigned int order);
> extern void prep_compound_page(struct page *page, unsigned int order);
> @@ -165,6 +167,9 @@ extern int user_min_free_kbytes;
> struct compact_control {
> struct list_head freepages; /* List of free pages to migrate to */
> struct list_head migratepages; /* List of pages being migrated */
> + struct list_head freepages_held;/* List of free pages from the block
> + * that's being migrated
> + */
> unsigned long nr_freepages; /* Number of isolated free pages */
> unsigned long nr_migratepages; /* Number of pages to migrate */
> unsigned long free_pfn; /* isolate_freepages search base */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 59de90d5d3a3..3ee83fe02274 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2343,6 +2343,33 @@ int split_free_page(struct page *page)
> }
>
> /*
> + * Like split_free_page, but given the zone, it will grab a free page from
> + * the freelists.
> + */
> +struct page *
> +alloc_pages_zone(struct zone *zone, unsigned int order, int migratetype)
> +{
> + struct page *page;
> + unsigned long watermark;
> +
> + watermark = low_wmark_pages(zone) + (1 << order);
> + if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
> + return NULL;
> +
> + page = __rmqueue(zone, order, migratetype);
> + if (!page)
> + return NULL;
> +
> + __mod_zone_freepage_state(zone, -(1 << order),
> + get_pcppage_migratetype(page));
> +
> + set_page_owner(page, order, __GFP_MOVABLE);
> + set_page_refcounted(page);
> +
> + return page;
> +}
> +
> +/*
> * Allocate a page from the given zone. Use pcplists for order-0 allocations.
> */
> static inline
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 5e4300482897..9e07d11afa0d 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -822,6 +822,8 @@ const char * const vmstat_text[] = {
> #ifdef CONFIG_COMPACTION
> "compact_migrate_scanned",
> "compact_free_scanned",
> + "compact_free_direct_alloc",
> + "compact_free_direct_miss",
> "compact_isolated",
> "compact_stall",
> "compact_fail",
> --
> 2.7.3
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxxx For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>