Re: Crashes/hung tasks with z3pool under memory pressure

From: Guenter Roeck
Date: Wed Apr 18 2018 - 12:08:05 EST


On Wed, Apr 18, 2018 at 10:13:17AM +0200, Vitaly Wool wrote:
> Den tis 17 apr. 2018 kl 18:35 skrev Guenter Roeck <linux@xxxxxxxxxxxx>:
>
> <snip>
>
> > Getting better; the log is much less noisy. Unfortunately, there are still
> > locking problems, resulting in a hung task. I copied the log message to [1].
> > This is with [2] applied on top of v4.17-rc1.
>
> Now this version (this is a full patch to be applied instead of the previous one) should have the above problem resolved too:
>

Excellent - I can not reproduce the problem with this patch
applied.

Guenter

> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index c0bca6153b95..901c0b07cbda 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -144,7 +144,8 @@ enum z3fold_page_flags {
> PAGE_HEADLESS = 0,
> MIDDLE_CHUNK_MAPPED,
> NEEDS_COMPACTING,
> - PAGE_STALE
> + PAGE_STALE,
> + UNDER_RECLAIM
> };
>
> /*****************
> @@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
> clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> clear_bit(NEEDS_COMPACTING, &page->private);
> clear_bit(PAGE_STALE, &page->private);
> + clear_bit(UNDER_RECLAIM, &page->private);
>
> spin_lock_init(&zhdr->page_lock);
> kref_init(&zhdr->refcount);
> @@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> atomic64_dec(&pool->pages_nr);
> return;
> }
> + if (test_bit(UNDER_RECLAIM, &page->private)) {
> + z3fold_page_unlock(zhdr);
> + return;
> + }
> if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
> z3fold_page_unlock(zhdr);
> return;
> @@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> kref_get(&zhdr->refcount);
> list_del_init(&zhdr->buddy);
> zhdr->cpu = -1;
> + set_bit(UNDER_RECLAIM, &page->private);
> + break;
> }
>
> list_del_init(&page->lru);
> @@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> goto next;
> }
> next:
> - spin_lock(&pool->lock);
> if (test_bit(PAGE_HEADLESS, &page->private)) {
> if (ret == 0) {
> - spin_unlock(&pool->lock);
> free_z3fold_page(page);
> return 0;
> }
> - } else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
> - atomic64_dec(&pool->pages_nr);
> + spin_lock(&pool->lock);
> + list_add(&page->lru, &pool->lru);
> + spin_unlock(&pool->lock);
> + } else {
> + z3fold_page_lock(zhdr);
> + clear_bit(UNDER_RECLAIM, &page->private);
> + if (kref_put(&zhdr->refcount,
> + release_z3fold_page_locked)) {
> + atomic64_dec(&pool->pages_nr);
> + return 0;
> + }
> + /*
> + * if we are here, the page is still not completely
> + * free. Take the global pool lock then to be able

extra then ?

> + * to add it back to the lru list
> + */
> + spin_lock(&pool->lock);
> + list_add(&page->lru, &pool->lru);
> spin_unlock(&pool->lock);
> - return 0;
> + z3fold_page_unlock(zhdr);
> }
>
> - /*
> - * Add to the beginning of LRU.
> - * Pool lock has to be kept here to ensure the page has
> - * not already been released
> - */
> - list_add(&page->lru, &pool->lru);
> + /* We started off locked to we need to lock the pool back */
> + spin_lock(&pool->lock);
> }
> spin_unlock(&pool->lock);
> return -EAGAIN;