On Mon, Oct 14, 2019 at 11:49 PM Hui Zhu <teawater@xxxxxxxxx> wrote:
This commit let zswap treats THP as continuous normal pages
in zswap_frontswap_store.
It will store them to a lot of "zswap_entry". These "zswap_entry"
will be inserted to "zswap_tree" together.
why does zswap need to carry the added complexity of converting THP
into separate normal sized pages? That should be done higher up in
the swap layer.
Signed-off-by: Hui Zhu <teawaterz@xxxxxxxxxxxxxxxxx>
---
mm/zswap.c | 170 +++++++++++++++++++++++++++++++++++++++----------------------
1 file changed, 109 insertions(+), 61 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 46a3223..36aa10d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -316,11 +316,7 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
}
}
-/*
- * Carries out the common pattern of freeing and entry's zpool allocation,
- * freeing the entry itself, and decrementing the number of stored pages.
- */
-static void zswap_free_entry(struct zswap_entry *entry)
+static void zswap_free_entry_1(struct zswap_entry *entry)
{
if (!entry->length)
atomic_dec(&zswap_same_filled_pages);
@@ -329,6 +325,15 @@ static void zswap_free_entry(struct zswap_entry *entry)
zswap_pool_put(entry->pool);
}
zswap_entry_cache_free(entry);
+}
+
+/*
+ * Carries out the common pattern of freeing and entry's zpool allocation,
+ * freeing the entry itself, and decrementing the number of stored pages.
+ */
+static void zswap_free_entry(struct zswap_entry *entry)
+{
+ zswap_free_entry_1(entry);
atomic_dec(&zswap_stored_pages);
zswap_update_total_size();
}
@@ -980,15 +985,11 @@ static void zswap_fill_page(void *ptr, unsigned long value)
memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
}
-/*********************************
-* frontswap hooks
-**********************************/
-/* attempts to compress and store an single page */
-static int zswap_frontswap_store(unsigned type, pgoff_t offset,
- struct page *page)
+static int zswap_frontswap_store_1(unsigned type, pgoff_t offset,
+ struct page *page,
+ struct zswap_entry **entry_pointer)
{
- struct zswap_tree *tree = zswap_trees[type];
- struct zswap_entry *entry, *dupentry;
+ struct zswap_entry *entry;
struct crypto_comp *tfm;
int ret;
unsigned int hlen, dlen = PAGE_SIZE;
@@ -998,36 +999,6 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
gfp_t gfp;
- /* THP isn't supported */
- if (PageTransHuge(page)) {
- ret = -EINVAL;
- goto reject;
- }
-
- if (!zswap_enabled || !tree) {
- ret = -ENODEV;
- goto reject;
- }
-
- /* reclaim space if needed */
- if (zswap_is_full()) {
- zswap_pool_limit_hit++;
- if (zswap_shrink()) {
- zswap_reject_reclaim_fail++;
- ret = -ENOMEM;
- goto reject;
- }
-
- /* A second zswap_is_full() check after
- * zswap_shrink() to make sure it's now
- * under the max_pool_percent
- */
- if (zswap_is_full()) {
- ret = -ENOMEM;
- goto reject;
- }
- }
-
/* allocate entry */
entry = zswap_entry_cache_alloc(GFP_KERNEL);
if (!entry) {
@@ -1035,6 +1006,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
ret = -ENOMEM;
goto reject;
}
+ *entry_pointer = entry;
if (zswap_same_filled_pages_enabled) {
src = kmap_atomic(page);
@@ -1044,7 +1016,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
entry->length = 0;
entry->value = value;
atomic_inc(&zswap_same_filled_pages);
- goto insert_entry;
+ goto out;
}
kunmap_atomic(src);
}
@@ -1093,31 +1065,105 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
entry->handle = handle;
entry->length = dlen;
-insert_entry:
+out:
+ return 0;
+
+put_dstmem:
+ put_cpu_var(zswap_dstmem);
+ zswap_pool_put(entry->pool);
+freepage:
+ zswap_entry_cache_free(entry);
+reject:
+ return ret;
+}
+
+/*********************************
+* frontswap hooks
+**********************************/
+/* attempts to compress and store an single page */
+static int zswap_frontswap_store(unsigned type, pgoff_t offset,
+ struct page *page)
+{
+ struct zswap_tree *tree = zswap_trees[type];
+ struct zswap_entry **entries = NULL, *dupentry;
+ struct zswap_entry *single_entry[1];
+ int ret;
+ int i, nr;
+
+ if (!zswap_enabled || !tree) {
+ ret = -ENODEV;
+ goto reject;
+ }
+
+ /* reclaim space if needed */
+ if (zswap_is_full()) {
+ zswap_pool_limit_hit++;
+ if (zswap_shrink()) {
+ zswap_reject_reclaim_fail++;
+ ret = -ENOMEM;
+ goto reject;
+ }
+
+ /* A second zswap_is_full() check after
+ * zswap_shrink() to make sure it's now
+ * under the max_pool_percent
+ */
+ if (zswap_is_full()) {
+ ret = -ENOMEM;
+ goto reject;
+ }
+ }
+
+ nr = hpage_nr_pages(page);
+
+ if (unlikely(nr > 1)) {
+ entries = kvmalloc(sizeof(struct zswap_entry *) * nr,
+ GFP_KERNEL);
+ if (!entries) {
+ ret = -ENOMEM;
+ goto reject;
+ }
+ } else
+ entries = single_entry;
+
+ for (i = 0; i < nr; i++) {
+ ret = zswap_frontswap_store_1(type, offset + i, page + i,
+ &entries[i]);
+ if (ret)
+ goto freepage;
+ }
+
/* map */
spin_lock(&tree->lock);
- do {
- ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
- if (ret == -EEXIST) {
- zswap_duplicate_entry++;
- /* remove from rbtree */
- zswap_rb_erase(&tree->rbroot, dupentry);
- zswap_entry_put(tree, dupentry);
- }
- } while (ret == -EEXIST);
+ for (i = 0; i < nr; i++) {
+ do {
+ ret = zswap_rb_insert(&tree->rbroot, entries[i],
+ &dupentry);
+ if (ret == -EEXIST) {
+ zswap_duplicate_entry++;
+ /* remove from rbtree */
+ zswap_rb_erase(&tree->rbroot, dupentry);
+ zswap_entry_put(tree, dupentry);
+ }
+ } while (ret == -EEXIST);
+ }
spin_unlock(&tree->lock);
/* update stats */
- atomic_inc(&zswap_stored_pages);
+ atomic_add(nr, &zswap_stored_pages);
zswap_update_total_size();
- return 0;
-
-put_dstmem:
- put_cpu_var(zswap_dstmem);
- zswap_pool_put(entry->pool);
+ ret = 0;
freepage:
- zswap_entry_cache_free(entry);
+ if (unlikely(nr > 1)) {
+ if (ret) {
+ int j;
+
+ for (j = 0; j < i; j++)
+ zswap_free_entry_1(entries[j]);
+ }
+ kvfree(entries);
+ }
reject:
return ret;
}
@@ -1136,6 +1182,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
unsigned int dlen;
int ret;
+ BUG_ON(PageTransHuge(page));
+
/* find */
spin_lock(&tree->lock);
entry = zswap_entry_find_get(&tree->rbroot, offset);
--
2.7.4