[PATCH 1/2] swap: make swapcache_free aware of page size

From: Minchan Kim
Date: Fri Apr 28 2017 - 02:04:14 EST


Now, get_swap_page takes struct page and allocates swap space according
to page size(ie, normal or THP) so it would be more clear to take
struct page in swapcache_free which is a counter function of
get_swap_page without needing if-else statement of caller side.

Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
include/linux/swap.h | 4 ++--
mm/shmem.c | 2 +-
mm/swap_state.c | 13 +++----------
mm/swapfile.c | 12 ++++++++++--
mm/vmscan.c | 2 +-
5 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b60fea3748f8..16c8d2392ddd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -400,7 +400,7 @@ extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t);
+extern void swapcache_free(struct page *page, swp_entry_t);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern int free_swap_and_cache(swp_entry_t);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
@@ -459,7 +459,7 @@ static inline void swap_free(swp_entry_t swp)
{
}

-static inline void swapcache_free(swp_entry_t swp)
+static inline void swapcache_free(struct page *page, swp_entry_t swp)
{
}

diff --git a/mm/shmem.c b/mm/shmem.c
index 29948d7da172..ab1802664e97 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1326,7 +1326,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)

mutex_unlock(&shmem_swaplist_mutex);
free_swap:
- swapcache_free(swap);
+ swapcache_free(page, swap);
redirty:
set_page_dirty(page);
if (wbc->for_reclaim)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 16ff89d058f4..4af44fd4142e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -231,10 +231,7 @@ int add_to_swap(struct page *page, struct list_head *list)
return 1;

fail_free:
- if (PageTransHuge(page))
- swapcache_free_cluster(entry);
- else
- swapcache_free(entry);
+ swapcache_free(page, entry);
fail:
if (PageTransHuge(page) && !split_huge_page_to_list(page, list))
goto retry;
@@ -259,11 +256,7 @@ void delete_from_swap_cache(struct page *page)
__delete_from_swap_cache(page);
spin_unlock_irq(&address_space->tree_lock);

- if (PageTransHuge(page))
- swapcache_free_cluster(entry);
- else
- swapcache_free(entry);
-
+ swapcache_free(page, entry);
page_ref_sub(page, hpage_nr_pages(page));
}

@@ -415,7 +408,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry);
+ swapcache_free(new_page, entry);
} while (err != -ENOMEM);

if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 596306272059..9496cc3e955a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1144,7 +1144,7 @@ void swap_free(swp_entry_t entry)
/*
* Called after dropping swapcache to decrease refcnt to swap entries.
*/
-void swapcache_free(swp_entry_t entry)
+void __swapcache_free(swp_entry_t entry)
{
struct swap_info_struct *p;

@@ -1156,7 +1156,7 @@ void swapcache_free(swp_entry_t entry)
}

#ifdef CONFIG_THP_SWAP
-void swapcache_free_cluster(swp_entry_t entry)
+void __swapcache_free_cluster(swp_entry_t entry)
{
unsigned long offset = swp_offset(entry);
unsigned long idx = offset / SWAPFILE_CLUSTER;
@@ -1182,6 +1182,14 @@ void swapcache_free_cluster(swp_entry_t entry)
}
#endif /* CONFIG_THP_SWAP */

+void swapcache_free(struct page *page, swp_entry_t entry)
+{
+ if (!PageTransHuge(page))
+ __swapcache_free(entry);
+ else
+ __swapcache_free_cluster(entry);
+}
+
static int swp_entry_cmp(const void *ent1, const void *ent2)
{
const swp_entry_t *e1 = ent1, *e2 = ent2;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5ebf468c5429..0f8ca3d1761d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -708,7 +708,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
- swapcache_free(swap);
+ swapcache_free(page, swap);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
--
2.7.4


On Mon, May 01, 2017 at 12:44:30PM +0200, Johannes Weiner wrote:
> On Fri, Apr 28, 2017 at 05:40:44PM +0900, Minchan Kim wrote:
> > However, get_swap_page is ugly now. The caller should take care of
> > failure and should retry after split. I hope get_swap_page includes
> > split and retry logic in itself without reling on the caller.
>
> I think this makes the interface terrible. It's an allocation function
> to which you pass a reference object for size - and if the allocation
> doesn't succeed it'll split your reference object to make it fit?
>
> That's a nasty side effect for this function to have.

It does make sense to me.

With that point of view, retry logic in add_to_swap is awkward to me, too.
The add_to_swap is a kind of allocate function(swap slot and swap cache)
so I think it would be better to move retry logic to vmscan.c.

What do you think about it?