Re: [PATCH v2 08/23] ceph: Convert ceph_writepages_start() to use filemap_get_folios_tag()

From: Jeff Layton
Date: Fri Sep 30 2022 - 12:25:37 EST


On Mon, 2022-09-12 at 11:22 -0700, Vishal Moola (Oracle) wrote:
> Convert function to use folios throughout. This is in preparation for
> the removal of find_get_pages_range_tag().
>
> This change does NOT support large folios. This shouldn't be an issue as
> of now since ceph only utilizes folios of size 1 anyways, and there is a
> lot of work to be done on ceph conversions to folios for later patches
> at some point.
>
> Also some minor renaming for consistency.
>
> Signed-off-by: Vishal Moola (Oracle) <vishal.moola@xxxxxxxxx>
> ---
> fs/ceph/addr.c | 138 +++++++++++++++++++++++++------------------------
> 1 file changed, 70 insertions(+), 68 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index dcf701b05cc1..33dbe55b08be 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -792,7 +792,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> struct ceph_vino vino = ceph_vino(inode);
> pgoff_t index, start_index, end = -1;
> struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
> - struct pagevec pvec;
> + struct folio_batch fbatch;
> int rc = 0;
> unsigned int wsize = i_blocksize(inode);
> struct ceph_osd_request *req = NULL;
> @@ -821,7 +821,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> if (fsc->mount_options->wsize < wsize)
> wsize = fsc->mount_options->wsize;
>
> - pagevec_init(&pvec);
> + folio_batch_init(&fbatch);
>
> start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
> index = start_index;
> @@ -869,9 +869,9 @@ static int ceph_writepages_start(struct address_space *mapping,
>
> while (!done && index <= end) {
> int num_ops = 0, op_idx;
> - unsigned i, pvec_pages, max_pages, locked_pages = 0;
> + unsigned i, nr_folios, max_pages, locked_pages = 0;
> struct page **pages = NULL, **data_pages;
> - struct page *page;
> + struct folio *folio;
> pgoff_t strip_unit_end = 0;
> u64 offset = 0, len = 0;
> bool from_pool = false;
> @@ -879,28 +879,28 @@ static int ceph_writepages_start(struct address_space *mapping,
> max_pages = wsize >> PAGE_SHIFT;
>
> get_more_pages:
> - pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
> - end, PAGECACHE_TAG_DIRTY);
> - dout("pagevec_lookup_range_tag got %d\n", pvec_pages);
> - if (!pvec_pages && !locked_pages)
> + nr_folios = filemap_get_folios_tag(mapping, &index,
> + end, PAGECACHE_TAG_DIRTY, &fbatch);
> + dout("filemap_get_folios_tag got %d\n", nr_folios);
> + if (!nr_folios && !locked_pages)
> break;
> - for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
> - page = pvec.pages[i];
> - dout("? %p idx %lu\n", page, page->index);
> + for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
> + folio = fbatch.folios[i];
> + dout("? %p idx %lu\n", folio, folio->index);
> if (locked_pages == 0)
> - lock_page(page); /* first page */
> - else if (!trylock_page(page))
> + folio_lock(folio); /* first folio */
> + else if (!folio_trylock(folio))
> break;
>
> /* only dirty pages, or our accounting breaks */
> - if (unlikely(!PageDirty(page)) ||
> - unlikely(page->mapping != mapping)) {
> - dout("!dirty or !mapping %p\n", page);
> - unlock_page(page);
> + if (unlikely(!folio_test_dirty(folio)) ||
> + unlikely(folio->mapping != mapping)) {
> + dout("!dirty or !mapping %p\n", folio);
> + folio_unlock(folio);
> continue;
> }
> /* only if matching snap context */
> - pgsnapc = page_snap_context(page);
> + pgsnapc = page_snap_context(&folio->page);
> if (pgsnapc != snapc) {
> dout("page snapc %p %lld != oldest %p %lld\n",
> pgsnapc, pgsnapc->seq, snapc, snapc->seq);
> @@ -908,11 +908,10 @@ static int ceph_writepages_start(struct address_space *mapping,
> !ceph_wbc.head_snapc &&
> wbc->sync_mode != WB_SYNC_NONE)
> should_loop = true;
> - unlock_page(page);
> + folio_unlock(folio);
> continue;
> }
> - if (page_offset(page) >= ceph_wbc.i_size) {
> - struct folio *folio = page_folio(page);
> + if (folio_pos(folio) >= ceph_wbc.i_size) {
>
> dout("folio at %lu beyond eof %llu\n",
> folio->index, ceph_wbc.i_size);
> @@ -924,25 +923,26 @@ static int ceph_writepages_start(struct address_space *mapping,
> folio_unlock(folio);
> continue;
> }
> - if (strip_unit_end && (page->index > strip_unit_end)) {
> - dout("end of strip unit %p\n", page);
> - unlock_page(page);
> + if (strip_unit_end && (folio->index > strip_unit_end)) {
> + dout("end of strip unit %p\n", folio);
> + folio_unlock(folio);
> break;
> }
> - if (PageWriteback(page) || PageFsCache(page)) {
> + if (folio_test_writeback(folio) ||
> + folio_test_fscache(folio)) {
> if (wbc->sync_mode == WB_SYNC_NONE) {
> - dout("%p under writeback\n", page);
> - unlock_page(page);
> + dout("%p under writeback\n", folio);
> + folio_unlock(folio);
> continue;
> }
> - dout("waiting on writeback %p\n", page);
> - wait_on_page_writeback(page);
> - wait_on_page_fscache(page);
> + dout("waiting on writeback %p\n", folio);
> + folio_wait_writeback(folio);
> + folio_wait_fscache(folio);
> }
>
> - if (!clear_page_dirty_for_io(page)) {
> - dout("%p !clear_page_dirty_for_io\n", page);
> - unlock_page(page);
> + if (!folio_clear_dirty_for_io(folio)) {
> + dout("%p !clear_page_dirty_for_io\n", folio);
> + folio_unlock(folio);
> continue;
> }
>
> @@ -958,7 +958,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> u32 xlen;
>
> /* prepare async write request */
> - offset = (u64)page_offset(page);
> + offset = (u64)folio_pos(folio);
> ceph_calc_file_object_mapping(&ci->i_layout,
> offset, wsize,
> &objnum, &objoff,
> @@ -966,7 +966,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> len = xlen;
>
> num_ops = 1;
> - strip_unit_end = page->index +
> + strip_unit_end = folio->index +
> ((len - 1) >> PAGE_SHIFT);
>
> BUG_ON(pages);
> @@ -981,54 +981,53 @@ static int ceph_writepages_start(struct address_space *mapping,
> }
>
> len = 0;
> - } else if (page->index !=
> + } else if (folio->index !=
> (offset + len) >> PAGE_SHIFT) {
> if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS :
> CEPH_OSD_MAX_OPS)) {
> - redirty_page_for_writepage(wbc, page);
> - unlock_page(page);
> + folio_redirty_for_writepage(wbc, folio);
> + folio_unlock(folio);
> break;
> }
>
> num_ops++;
> - offset = (u64)page_offset(page);
> + offset = (u64)folio_pos(folio);
> len = 0;
> }
>
> - /* note position of first page in pvec */
> + /* note position of first page in fbatch */
> dout("%p will write page %p idx %lu\n",
> - inode, page, page->index);
> + inode, folio, folio->index);
>
> if (atomic_long_inc_return(&fsc->writeback_count) >
> CONGESTION_ON_THRESH(
> fsc->mount_options->congestion_kb))
> fsc->write_congested = true;
>
> - pages[locked_pages++] = page;
> - pvec.pages[i] = NULL;
> + pages[locked_pages++] = &folio->page;
> + fbatch.folios[i] = NULL;
>
> - len += thp_size(page);
> + len += folio_size(folio);
> }
>
> /* did we get anything? */
> if (!locked_pages)
> - goto release_pvec_pages;
> + goto release_folio_batches;
> if (i) {
> unsigned j, n = 0;
> - /* shift unused page to beginning of pvec */
> - for (j = 0; j < pvec_pages; j++) {
> - if (!pvec.pages[j])
> + /* shift unused folio to the beginning of fbatch */
> + for (j = 0; j < nr_folios; j++) {
> + if (!fbatch.folios[j])
> continue;
> if (n < j)
> - pvec.pages[n] = pvec.pages[j];
> + fbatch.folios[n] = fbatch.folios[j];
> n++;
> }
> - pvec.nr = n;
> -
> - if (pvec_pages && i == pvec_pages &&
> + fbatch.nr = n;
> + if (nr_folios && i == nr_folios &&
> locked_pages < max_pages) {
> - dout("reached end pvec, trying for more\n");
> - pagevec_release(&pvec);
> + dout("reached end of fbatch, trying for more\n");
> + folio_batch_release(&fbatch);
> goto get_more_pages;
> }
> }
> @@ -1056,7 +1055,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> BUG_ON(IS_ERR(req));
> }
> BUG_ON(len < page_offset(pages[locked_pages - 1]) +
> - thp_size(page) - offset);
> + folio_size(folio) - offset);
>
> req->r_callback = writepages_finish;
> req->r_inode = inode;
> @@ -1098,7 +1097,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> set_page_writeback(pages[i]);
> if (caching)
> ceph_set_page_fscache(pages[i]);
> - len += thp_size(page);
> + len += folio_size(folio);
> }
> ceph_fscache_write_to_cache(inode, offset, len, caching);
>
> @@ -1108,7 +1107,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> /* writepages_finish() clears writeback pages
> * according to the data length, so make sure
> * data length covers all locked pages */
> - u64 min_len = len + 1 - thp_size(page);
> + u64 min_len = len + 1 - folio_size(folio);
> len = get_writepages_data_length(inode, pages[i - 1],
> offset);
> len = max(len, min_len);
> @@ -1164,10 +1163,10 @@ static int ceph_writepages_start(struct address_space *mapping,
> if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
> done = true;
>
> -release_pvec_pages:
> - dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
> - pvec.nr ? pvec.pages[0] : NULL);
> - pagevec_release(&pvec);
> +release_folio_batches:
> + dout("folio_batch_release on %d batches (%p)", (int) fbatch.nr,
> + fbatch.nr ? fbatch.folios[0] : NULL);
> + folio_batch_release(&fbatch);
> }
>
> if (should_loop && !done) {
> @@ -1180,19 +1179,22 @@ static int ceph_writepages_start(struct address_space *mapping,
> if (wbc->sync_mode != WB_SYNC_NONE &&
> start_index == 0 && /* all dirty pages were checked */
> !ceph_wbc.head_snapc) {
> - struct page *page;
> + struct folio *folio;
> unsigned i, nr;
> index = 0;
> while ((index <= end) &&
> - (nr = pagevec_lookup_tag(&pvec, mapping, &index,
> - PAGECACHE_TAG_WRITEBACK))) {
> + (nr = filemap_get_folios_tag(mapping, &index,
> + (pgoff_t)-1,
> + PAGECACHE_TAG_WRITEBACK,
> + &fbatch))) {
> for (i = 0; i < nr; i++) {
> - page = pvec.pages[i];
> - if (page_snap_context(page) != snapc)
> + folio = fbatch.folios[i];
> + if (page_snap_context(&folio->page) !=
> + snapc)
> continue;
> - wait_on_page_writeback(page);
> + folio_wait_writeback(folio);
> }
> - pagevec_release(&pvec);
> + folio_batch_release(&fbatch);
> cond_resched();
> }
> }


We have some work in progress to add write helpers to netfslib. Once we
get those in place, we plan to convert ceph to use them. At that point
ceph_writepages just goes away.

I think it'd be best to just wait for that and to just ensure that
netfslib uses filemap_get_folios_tag and the like where appropriate.
--
Jeff Layton <jlayton@xxxxxxxxxx>