[PATCH v1 2/5] s390/uv: convert gmap_make_secure() to work on folios

From: David Hildenbrand
Date: Thu Apr 04 2024 - 12:38:32 EST


We have various goals that require gmap_make_secure() to only work on
folios. We want to limit the use of page_mapcount() to the places where it
is absolutely necessary, we want to avoid using page flags of tail
pages, and we want to remove page_has_private().

So, let's convert gmap_make_secure() to folios. While s390x makes sure
to never have PMD-mapped THP in processes that use KVM -- by remapping
them using PTEs in thp_split_walk_pmd_entry()->split_huge_pmd() -- we might
still find PTE-mapped THPs and could end up working on tail pages of
such large folios for now.

To handle that cleanly, let's simply split any PTE-mapped large folio,
so we can be sure that we are always working with small folios and never
on tail pages.

There is no real change: splitting will similarly fail on unexpected folio
references, just like it would already when we try to freeze the folio
refcount.

Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
arch/s390/include/asm/page.h | 1 +
arch/s390/kernel/uv.c | 66 ++++++++++++++++++++++--------------
2 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 9381879f7ecf..54d015bcd8e3 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -215,6 +215,7 @@ static inline unsigned long __phys_addr(unsigned long x, bool is_31bit)

#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys))
#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define folio_to_phys(page) pfn_to_phys(folio_pfn(folio))

static inline void *pfn_to_virt(unsigned long pfn)
{
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 7401838b960b..adcbd4b13035 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -181,36 +181,36 @@ int uv_convert_owned_from_secure(unsigned long paddr)
}

/*
- * Calculate the expected ref_count for a page that would otherwise have no
+ * Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
* the kernel, but with some slight modifications. We know that a secure
- * page can not be a huge page for example.
+ * folio can only be a small folio for example.
*/
-static int expected_page_refs(struct page *page)
+static int expected_folio_refs(struct folio *folio)
{
int res;

- res = page_mapcount(page);
- if (PageSwapCache(page)) {
+ res = folio_mapcount(folio);
+ if (folio_test_swapcache(folio)) {
res++;
- } else if (page_mapping(page)) {
+ } else if (folio_mapping(folio)) {
res++;
- if (page_has_private(page))
+ if (folio_has_private(folio))
res++;
}
return res;
}

-static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
+static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;

- if (PageWriteback(page))
+ if (folio_test_writeback(folio))
return -EAGAIN;
- expected = expected_page_refs(page);
- if (!page_ref_freeze(page, expected))
+ expected = expected_folio_refs(folio);
+ if (!folio_ref_freeze(folio, expected))
return -EBUSY;
- set_bit(PG_arch_1, &page->flags);
+ set_bit(PG_arch_1, &folio->flags);
/*
* If the UVC does not succeed or fail immediately, we don't want to
* loop for long, or we might get stall notifications.
@@ -220,9 +220,9 @@ static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
* -EAGAIN and we let the callers deal with it.
*/
cc = __uv_call(0, (u64)uvcb);
- page_ref_unfreeze(page, expected);
+ folio_ref_unfreeze(folio, expected);
/*
- * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
+ * Return -ENXIO if the folio was not mapped, -EINVAL for other errors.
* If busy or partially completed, return -EAGAIN.
*/
if (cc == UVC_CC_OK)
@@ -277,7 +277,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
bool local_drain = false;
spinlock_t *ptelock;
unsigned long uaddr;
- struct page *page;
+ struct folio *folio;
pte_t *ptep;
int rc;

@@ -306,33 +306,49 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
if (!ptep)
goto out;
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- page = pte_page(*ptep);
+ folio = page_folio(pte_page(*ptep));
rc = -EAGAIN;
- if (trylock_page(page)) {
+
+ /* We might get PTE-mapped large folios; split them first. */
+ if (folio_test_large(folio)) {
+ rc = -E2BIG;
+ } else if (folio_trylock(folio)) {
if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(page_to_phys(page));
- rc = make_page_secure(page, uvcb);
- unlock_page(page);
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
}

/*
- * Once we drop the PTL, the page may get unmapped and
+ * Once we drop the PTL, the folio may get unmapped and
* freed immediately. We need a temporary reference.
*/
- if (rc == -EAGAIN)
- get_page(page);
+ if (rc == -EAGAIN || rc == -E2BIG)
+ folio_get(folio);
}
pte_unmap_unlock(ptep, ptelock);
out:
mmap_read_unlock(gmap->mm);

+ if (rc == -E2BIG) {
+ /*
+ * Splitting might fail with -EBUSY due to unexpected folio
+ * references, just like make_folio_secure(). So handle it
+ * ahead of time without the PTL being held.
+ */
+ folio_lock(folio);
+ rc = split_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
if (rc == -EAGAIN) {
/*
* If we are here because the UVC returned busy or partial
* completion, this is just a useless check, but it is safe.
*/
- wait_on_page_writeback(page);
- put_page(page);
+ folio_wait_writeback(folio);
+ folio_put(folio);
} else if (rc == -EBUSY) {
/*
* If we have tried a local drain and the page refcount
--
2.44.0