[PATCH] mm/z3fold.c: Fix race between migration and destruction

From: Henry Burns
Date: Fri Aug 09 2019 - 12:46:51 EST


In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq).
However, we have no guarantee that migration isn't happening in the
background at that time.

Migration directly calls queue_work_on(pool->compact_wq), if destruction
wins that race we are using a destroyed workqueue.

Signed-off-by: Henry Burns <henryburns@xxxxxxxxxx>
---
mm/z3fold.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 78447cecfffa..e136d97ce56e 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -40,6 +40,7 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/wait.h>
#include <linux/zpool.h>

/*
@@ -161,8 +162,10 @@ struct z3fold_pool {
const struct zpool_ops *zpool_ops;
struct workqueue_struct *compact_wq;
struct workqueue_struct *release_wq;
+ struct wait_queue_head isolate_wait;
struct work_struct work;
struct inode *inode;
+ int isolated_pages;
};

/*
@@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
goto out_c;
spin_lock_init(&pool->lock);
spin_lock_init(&pool->stale_lock);
+ init_waitqueue_head(&pool->isolate_wait);
pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
if (!pool->unbuddied)
goto out_pool;
@@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
return NULL;
}

+static bool pool_isolated_are_drained(struct z3fold_pool *pool)
+{
+ bool ret;
+
+ spin_lock(&pool->lock);
+ ret = pool->isolated_pages == 0;
+ spin_unlock(&pool->lock);
+ return ret;
+}
/**
* z3fold_destroy_pool() - destroys an existing z3fold pool
* @pool: the z3fold pool to be destroyed
@@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
{
kmem_cache_destroy(pool->c_handle);

+ /*
+ * We need to ensure that no pages are being migrated while we destroy
+ * these workqueues, as migration can queue work on either of the
+ * workqueues.
+ */
+ wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
+
/*
* We need to destroy pool->compact_wq before pool->release_wq,
* as any pending work on pool->compact_wq will call
@@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
return atomic64_read(&pool->pages_nr);
}

+/*
+ * z3fold_dec_isolated() expects to be called while pool->lock is held.
+ */
+static void z3fold_dec_isolated(struct z3fold_pool *pool)
+{
+ assert_spin_locked(&pool->lock);
+ VM_BUG_ON(pool->isolated_pages <= 0);
+ pool->isolated_pages--;
+
+ /*
+ * If we have no more isolated pages, we have to see if
+ * z3fold_destroy_pool() is waiting for a signal.
+ */
+ if (pool->isolated_pages == 0 && waitqueue_active(&pool->isolate_wait))
+ wake_up_all(&pool->isolate_wait);
+}
+
+static void z3fold_inc_isolated(struct z3fold_pool *pool)
+{
+ pool->isolated_pages++;
+}
+
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
{
struct z3fold_header *zhdr;
@@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
spin_lock(&pool->lock);
if (!list_empty(&page->lru))
list_del(&page->lru);
+ z3fold_inc_isolated(pool);
spin_unlock(&pool->lock);
z3fold_page_unlock(zhdr);
return true;
@@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa

queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);

+ spin_lock(&pool->lock);
+ z3fold_dec_isolated(pool);
+ spin_unlock(&pool->lock);
+
page_mapcount_reset(page);
put_page(page);
return 0;
@@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page)
INIT_LIST_HEAD(&page->lru);
if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
atomic64_dec(&pool->pages_nr);
+ spin_lock(&pool->lock);
+ z3fold_dec_isolated(pool);
+ spin_unlock(&pool->lock);
return;
}
spin_lock(&pool->lock);
list_add(&page->lru, &pool->lru);
+ z3fold_dec_isolated(pool);
spin_unlock(&pool->lock);
z3fold_page_unlock(zhdr);
}
--
2.22.0.770.g0f2c4a37fd-goog