Re: [PATCH 07/12] erofs: Convert uncompressed files from readpages to readahead

From: Gao Xiang
Date: Fri Jan 24 2020 - 20:54:36 EST


Hi Matthew,

On Fri, Jan 24, 2020 at 05:35:48PM -0800, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx>
>
> Use the new readahead operation in erofs. Fix what I believe to be
> a refcounting bug in the error case.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
> Cc: linux-erofs@xxxxxxxxxxxxxxxx
> ---
> fs/erofs/data.c | 34 ++++++++++++++--------------------
> fs/erofs/zdata.c | 2 +-
> include/trace/events/erofs.h | 6 +++---
> 3 files changed, 18 insertions(+), 24 deletions(-)
>
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index fc3a8d8064f8..335c1ab05312 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -280,42 +280,36 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page)
> return 0;
> }
>
> -static int erofs_raw_access_readpages(struct file *filp,
> +static unsigned erofs_raw_access_readahead(struct file *file,
> struct address_space *mapping,
> - struct list_head *pages,
> + pgoff_t start,
> unsigned int nr_pages)
> {
> erofs_off_t last_block;
> struct bio *bio = NULL;
> - gfp_t gfp = readahead_gfp_mask(mapping);
> - struct page *page = list_last_entry(pages, struct page, lru);
>
> - trace_erofs_readpages(mapping->host, page, nr_pages, true);
> + trace_erofs_readpages(mapping->host, start, nr_pages, true);
>
> for (; nr_pages; --nr_pages) {
> - page = list_entry(pages->prev, struct page, lru);
> + struct page *page = readahead_page(mapping, start++);
>
> prefetchw(&page->flags);
> - list_del(&page->lru);
>
> - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
> - bio = erofs_read_raw_page(bio, mapping, page,
> - &last_block, nr_pages, true);
> + bio = erofs_read_raw_page(bio, mapping, page, &last_block,
> + nr_pages, true);
>
> - /* all the page errors are ignored when readahead */
> - if (IS_ERR(bio)) {
> - pr_err("%s, readahead error at page %lu of nid %llu\n",
> - __func__, page->index,
> - EROFS_I(mapping->host)->nid);
> + /* all the page errors are ignored when readahead */
> + if (IS_ERR(bio)) {
> + pr_err("%s, readahead error at page %lu of nid %llu\n",
> + __func__, page->index,
> + EROFS_I(mapping->host)->nid);
>
> - bio = NULL;
> - }
> + bio = NULL;
> + put_page(page);

Out of curiously, some little question... Why we need put_page(page) twice
if erofs_read_raw_page returns with error...

One put_page(page) is used as a temporary reference count for this request,
we could put_page(page) in advance since pages are still locked before endio.

Another put_page(page) is used for page cache xarray. I think in this case
the page has been successfully inserted to the page cache anyway, after erroring
out it will trigger .readpage again... so probably we need to keep this
refcount count for page cache xarray?

If I'm missing something, kindly correct me if I'm wrong....

Thanks,
Gao Xiang

> }
>
> - /* pages could still be locked */
> put_page(page);
> }
> - DBG_BUGON(!list_empty(pages));
>
> /* the rare case (end in gaps) */
> if (bio)
> @@ -358,7 +352,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
> /* for uncompressed (aligned) files and raw access for other files */
> const struct address_space_operations erofs_raw_access_aops = {
> .readpage = erofs_raw_access_readpage,
> - .readpages = erofs_raw_access_readpages,
> + .readahead = erofs_raw_access_readahead,
> .bmap = erofs_bmap,
> };
>
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index ca99425a4536..d3dd8cf1fc01 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -1340,7 +1340,7 @@ static int z_erofs_readpages(struct file *filp, struct address_space *mapping,
> struct page *head = NULL;
> LIST_HEAD(pagepool);
>
> - trace_erofs_readpages(mapping->host, lru_to_page(pages),
> + trace_erofs_readpages(mapping->host, lru_to_page(pages)->index,
> nr_pages, false);
>
> f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT;
> diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
> index 27f5caa6299a..bf9806fd1306 100644
> --- a/include/trace/events/erofs.h
> +++ b/include/trace/events/erofs.h
> @@ -113,10 +113,10 @@ TRACE_EVENT(erofs_readpage,
>
> TRACE_EVENT(erofs_readpages,
>
> - TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage,
> + TP_PROTO(struct inode *inode, pgoff_t start, unsigned int nrpage,
> bool raw),
>
> - TP_ARGS(inode, page, nrpage, raw),
> + TP_ARGS(inode, start, nrpage, raw),
>
> TP_STRUCT__entry(
> __field(dev_t, dev )
> @@ -129,7 +129,7 @@ TRACE_EVENT(erofs_readpages,
> TP_fast_assign(
> __entry->dev = inode->i_sb->s_dev;
> __entry->nid = EROFS_I(inode)->nid;
> - __entry->start = page->index;
> + __entry->start = start;
> __entry->nrpage = nrpage;
> __entry->raw = raw;
> ),
> --
> 2.24.1
>