Re: [PATCH v2 13/25] fs: Add zero_user_large

From: Matthew Wilcox
Date: Tue Feb 18 2020 - 11:13:52 EST


On Tue, Feb 18, 2020 at 05:16:34PM +0300, Kirill A. Shutemov wrote:
> > + if (start1 >= PAGE_SIZE) {
> > + start1 -= PAGE_SIZE;
> > + end1 -= PAGE_SIZE;
> > + if (start2) {
> > + start2 -= PAGE_SIZE;
> > + end2 -= PAGE_SIZE;
> > + }
>
> You assume start2/end2 is always after start1/end1 in the page.
> Is it always true? If so, I would add BUG_ON() for it.

after or zero. Yes, I should add a BUG_ON to check for that.

> Otherwise, looks good.

Here's what I currently have (I'll add the BUG_ON later):

commit 7fabe16755365cdc6e80343ef994843ecebde60a
Author: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
Date: Sat Feb 1 03:38:49 2020 -0500

fs: Support THPs in zero_user_segments

We can only kmap() one subpage of a THP at a time, so loop over all
relevant subpages, skipping ones which don't need to be zeroed. This is
too large to inline when THPs are enabled and we actually need highmem,
so put it in highmem.c.

Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index ea5cdbd8c2c3..74614903619d 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -215,13 +215,18 @@ static inline void clear_highpage(struct page *page)
kunmap_atomic(kaddr);
}

+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+ unsigned start2, unsigned end2);
+#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
static inline void zero_user_segments(struct page *page,
- unsigned start1, unsigned end1,
- unsigned start2, unsigned end2)
+ unsigned start1, unsigned end1,
+ unsigned start2, unsigned end2)
{
+ unsigned long i;
void *kaddr = kmap_atomic(page);

- BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
+ BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));

if (end1 > start1)
memset(kaddr + start1, 0, end1 - start1);
@@ -230,8 +235,10 @@ static inline void zero_user_segments(struct page *page,
memset(kaddr + start2, 0, end2 - start2);

kunmap_atomic(kaddr);
- flush_dcache_page(page);
+ for (i = 0; i < hpage_nr_pages(page); i++)
+ flush_dcache_page(page + i);
}
+#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */

static inline void zero_user_segment(struct page *page,
unsigned start, unsigned end)
diff --git a/mm/highmem.c b/mm/highmem.c
index 64d8dea47dd1..3a85c66ef532 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -367,9 +367,67 @@ void kunmap_high(struct page *page)
if (need_wakeup)
wake_up(pkmap_map_wait);
}
-
EXPORT_SYMBOL(kunmap_high);
-#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+ unsigned start2, unsigned end2)
+{
+ unsigned int i;
+
+ BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
+
+ for (i = 0; i < hpage_nr_pages(page); i++) {
+ void *kaddr;
+ unsigned this_end;
+
+ if (end1 == 0 && start2 >= PAGE_SIZE) {
+ start2 -= PAGE_SIZE;
+ end2 -= PAGE_SIZE;
+ continue;
+ }
+
+ if (start1 >= PAGE_SIZE) {
+ start1 -= PAGE_SIZE;
+ end1 -= PAGE_SIZE;
+ if (start2) {
+ start2 -= PAGE_SIZE;
+ end2 -= PAGE_SIZE;
+ }
+ continue;
+ }
+
+ kaddr = kmap_atomic(page + i);
+
+ this_end = min_t(unsigned, end1, PAGE_SIZE);
+ if (end1 > start1)
+ memset(kaddr + start1, 0, this_end - start1);
+ end1 -= this_end;
+ start1 = 0;
+
+ if (start2 >= PAGE_SIZE) {
+ start2 -= PAGE_SIZE;
+ end2 -= PAGE_SIZE;
+ } else {
+ this_end = min_t(unsigned, end2, PAGE_SIZE);
+ if (end2 > start2)
+ memset(kaddr + start2, 0, this_end - start2);
+ end2 -= this_end;
+ start2 = 0;
+ }
+
+ kunmap_atomic(kaddr);
+ flush_dcache_page(page + i);
+
+ if (!end1 && !end2)
+ break;
+ }
+
+ BUG_ON((start1 | start2 | end1 | end2) != 0);
+}
+EXPORT_SYMBOL(zero_user_segments);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_HIGHMEM */

#if defined(HASHED_PAGE_VIRTUAL)




> > + continue;
> > + }
> > +
> > + kaddr = kmap_atomic(page + i);
> > +
> > + this_end = min_t(unsigned, end1, PAGE_SIZE);
> > + if (end1 > start1)
> > + memset(kaddr + start1, 0, this_end - start1);
> > + end1 -= this_end;
> > + start1 = 0;
> > +
> > + if (start2 >= PAGE_SIZE) {
> > + start2 -= PAGE_SIZE;
> > + end2 -= PAGE_SIZE;
> > + } else {
> > + this_end = min_t(unsigned, end2, PAGE_SIZE);
> > + if (end2 > start2)
> > + memset(kaddr + start2, 0, this_end - start2);
> > + end2 -= this_end;
> > + start2 = 0;
> > + }
> > +
> > + kunmap_atomic(kaddr);
> > + flush_dcache_page(page + i);
> > +
> > + if (!end1 && !end2)
> > + break;
> > + }
> > +
> > + BUG_ON((start1 | start2 | end1 | end2) != 0);
> > }
> >
> > I think at this point it has to move out-of-line too.
> >
> > > > +static inline void zero_user_large(struct page *page,
> > > > + unsigned start, unsigned size)
> > > > +{
> > > > + unsigned int i;
> > > > +
> > > > + for (i = 0; i < thp_order(page); i++) {
> > > > + if (start > PAGE_SIZE) {
> > >
> > > Off-by-one? >= ?
> >
> > Good catch; I'd also noticed that when I came to redo the zero_user_segments().
> >
>
> --
> Kirill A. Shutemov