Re: [RFC][PATCH 2/8] mm/migrate: Defer allocating new page until needed

From: Greg Thelen
Date: Wed Jul 01 2020 - 04:47:15 EST

Next message: Pavel Machek: "Re: [RFC] Restrict the untrusted devices, to bind to only a set of "whitelisted" drivers"
Previous message: Arnd Bergmann: "Re: [PATCH V3 04/10] clk: imx: Support building SCU clock driver as module"
Next in thread: Dave Hansen: "Re: [RFC][PATCH 2/8] mm/migrate: Defer allocating new page until needed"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> wrote:

> From: Keith Busch <kbusch@xxxxxxxxxx>
>
> Migrating pages had been allocating the new page before it was actually
> needed. Subsequent operations may still fail, which would have to handle
> cleaning up the newly allocated page when it was never used.
>
> Defer allocating the page until we are actually ready to make use of
> it, after locking the original page. This simplifies error handling,
> but should not have any functional change in behavior. This is just
> refactoring page migration so the main part can more easily be reused
> by other code.

Is there any concern that the src page is now held PG_locked over the
dst page allocation, which might wander into
reclaim/cond_resched/oom_kill? I don't have a deadlock in mind. I'm
just wondering about the additional latency imposed on unrelated threads
who want access src page.

> #Signed-off-by: Keith Busch <keith.busch@xxxxxxxxx>

Is commented Signed-off-by intentional? Same applies to later patches.

> Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
> Cc: Keith Busch <kbusch@xxxxxxxxxx>
> Cc: Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx>
> Cc: David Rientjes <rientjes@xxxxxxxxxx>
> Cc: Huang Ying <ying.huang@xxxxxxxxx>
> Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
> ---
>
> b/mm/migrate.c | 148 ++++++++++++++++++++++++++++-----------------------------
> 1 file changed, 75 insertions(+), 73 deletions(-)
>
> diff -puN mm/migrate.c~0007-mm-migrate-Defer-allocating-new-page-until-needed mm/migrate.c
> --- a/mm/migrate.c~0007-mm-migrate-Defer-allocating-new-page-until-needed 2020-06-29 16:34:37.896312607 -0700
> +++ b/mm/migrate.c 2020-06-29 16:34:37.900312607 -0700
> @@ -1014,56 +1014,17 @@ out:
> return rc;
> }
>
> -static int __unmap_and_move(struct page *page, struct page *newpage,
> - int force, enum migrate_mode mode)
> +static int __unmap_and_move(new_page_t get_new_page,
> + free_page_t put_new_page,
> + unsigned long private, struct page *page,
> + enum migrate_mode mode,
> + enum migrate_reason reason)
> {
> int rc = -EAGAIN;
> int page_was_mapped = 0;
> struct anon_vma *anon_vma = NULL;
> bool is_lru = !__PageMovable(page);
> -
> - if (!trylock_page(page)) {
> - if (!force || mode == MIGRATE_ASYNC)
> - goto out;
> -
> - /*
> - * It's not safe for direct compaction to call lock_page.
> - * For example, during page readahead pages are added locked
> - * to the LRU. Later, when the IO completes the pages are
> - * marked uptodate and unlocked. However, the queueing
> - * could be merging multiple pages for one bio (e.g.
> - * mpage_readpages). If an allocation happens for the
> - * second or third page, the process can end up locking
> - * the same page twice and deadlocking. Rather than
> - * trying to be clever about what pages can be locked,
> - * avoid the use of lock_page for direct compaction
> - * altogether.
> - */
> - if (current->flags & PF_MEMALLOC)
> - goto out;
> -
> - lock_page(page);
> - }
> -
> - if (PageWriteback(page)) {
> - /*
> - * Only in the case of a full synchronous migration is it
> - * necessary to wait for PageWriteback. In the async case,
> - * the retry loop is too short and in the sync-light case,
> - * the overhead of stalling is too much
> - */
> - switch (mode) {
> - case MIGRATE_SYNC:
> - case MIGRATE_SYNC_NO_COPY:
> - break;
> - default:
> - rc = -EBUSY;
> - goto out_unlock;
> - }
> - if (!force)
> - goto out_unlock;
> - wait_on_page_writeback(page);
> - }
> + struct page *newpage;
>
> /*
> * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
> @@ -1082,6 +1043,12 @@ static int __unmap_and_move(struct page
> if (PageAnon(page) && !PageKsm(page))
> anon_vma = page_get_anon_vma(page);
>
> + newpage = get_new_page(page, private);
> + if (!newpage) {
> + rc = -ENOMEM;
> + goto out;
> + }
> +
> /*
> * Block others from accessing the new page when we get around to
> * establishing additional references. We are usually the only one
> @@ -1091,11 +1058,11 @@ static int __unmap_and_move(struct page
> * This is much like races on refcount of oldpage: just don't BUG().
> */
> if (unlikely(!trylock_page(newpage)))
> - goto out_unlock;
> + goto out_put;
>
> if (unlikely(!is_lru)) {
> rc = move_to_new_page(newpage, page, mode);
> - goto out_unlock_both;
> + goto out_unlock;
> }
>
> /*
> @@ -1114,7 +1081,7 @@ static int __unmap_and_move(struct page
> VM_BUG_ON_PAGE(PageAnon(page), page);
> if (page_has_private(page)) {
> try_to_free_buffers(page);
> - goto out_unlock_both;
> + goto out_unlock;
> }
> } else if (page_mapped(page)) {
> /* Establish migration ptes */
> @@ -1131,15 +1098,9 @@ static int __unmap_and_move(struct page
> if (page_was_mapped)
> remove_migration_ptes(page,
> rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
> -
> -out_unlock_both:
> - unlock_page(newpage);
> out_unlock:
> - /* Drop an anon_vma reference if we took one */
> - if (anon_vma)
> - put_anon_vma(anon_vma);
> - unlock_page(page);
> -out:
> + unlock_page(newpage);
> +out_put:
> /*
> * If migration is successful, decrease refcount of the newpage
> * which will not free the page because new page owner increased
> @@ -1150,12 +1111,20 @@ out:
> * state.
> */
> if (rc == MIGRATEPAGE_SUCCESS) {
> + set_page_owner_migrate_reason(newpage, reason);
> if (unlikely(!is_lru))
> put_page(newpage);
> else
> putback_lru_page(newpage);
> + } else if (put_new_page) {
> + put_new_page(newpage, private);
> + } else {
> + put_page(newpage);
> }
> -
> +out:
> + /* Drop an anon_vma reference if we took one */
> + if (anon_vma)
> + put_anon_vma(anon_vma);
> return rc;
> }
>
> @@ -1203,8 +1172,7 @@ static ICE_noinline int unmap_and_move(n
> int force, enum migrate_mode mode,
> enum migrate_reason reason)
> {
> - int rc = MIGRATEPAGE_SUCCESS;
> - struct page *newpage = NULL;
> + int rc = -EAGAIN;
>
> if (!thp_migration_supported() && PageTransHuge(page))
> return -ENOMEM;
> @@ -1219,17 +1187,57 @@ static ICE_noinline int unmap_and_move(n
> __ClearPageIsolated(page);
> unlock_page(page);
> }
> + rc = MIGRATEPAGE_SUCCESS;
> goto out;
> }
>
> - newpage = get_new_page(page, private);
> - if (!newpage)
> - return -ENOMEM;
> + if (!trylock_page(page)) {
> + if (!force || mode == MIGRATE_ASYNC)
> + return rc;
>
> - rc = __unmap_and_move(page, newpage, force, mode);
> - if (rc == MIGRATEPAGE_SUCCESS)
> - set_page_owner_migrate_reason(newpage, reason);
> + /*
> + * It's not safe for direct compaction to call lock_page.
> + * For example, during page readahead pages are added locked
> + * to the LRU. Later, when the IO completes the pages are
> + * marked uptodate and unlocked. However, the queueing
> + * could be merging multiple pages for one bio (e.g.
> + * mpage_readpages). If an allocation happens for the
> + * second or third page, the process can end up locking
> + * the same page twice and deadlocking. Rather than
> + * trying to be clever about what pages can be locked,
> + * avoid the use of lock_page for direct compaction
> + * altogether.
> + */
> + if (current->flags & PF_MEMALLOC)
> + return rc;
> +
> + lock_page(page);
> + }
> +
> + if (PageWriteback(page)) {
> + /*
> + * Only in the case of a full synchronous migration is it
> + * necessary to wait for PageWriteback. In the async case,
> + * the retry loop is too short and in the sync-light case,
> + * the overhead of stalling is too much
> + */
> + switch (mode) {
> + case MIGRATE_SYNC:
> + case MIGRATE_SYNC_NO_COPY:
> + break;
> + default:
> + rc = -EBUSY;
> + goto out_unlock;
> + }
> + if (!force)
> + goto out_unlock;
> + wait_on_page_writeback(page);
> + }
> + rc = __unmap_and_move(get_new_page, put_new_page, private,
> + page, mode, reason);
>
> +out_unlock:
> + unlock_page(page);
> out:
> if (rc != -EAGAIN) {
> /*
> @@ -1269,9 +1277,8 @@ out:
> if (rc != -EAGAIN) {
> if (likely(!__PageMovable(page))) {
> putback_lru_page(page);
> - goto put_new;
> + goto done;
> }
> -
> lock_page(page);
> if (PageMovable(page))
> putback_movable_page(page);
> @@ -1280,13 +1287,8 @@ out:
> unlock_page(page);
> put_page(page);
> }
> -put_new:
> - if (put_new_page)
> - put_new_page(newpage, private);
> - else
> - put_page(newpage);
> }
> -
> +done:
> return rc;
> }
>
> _

Next message: Pavel Machek: "Re: [RFC] Restrict the untrusted devices, to bind to only a set of "whitelisted" drivers"
Previous message: Arnd Bergmann: "Re: [PATCH V3 04/10] clk: imx: Support building SCU clock driver as module"
Next in thread: Dave Hansen: "Re: [RFC][PATCH 2/8] mm/migrate: Defer allocating new page until needed"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]