[PATCH 2/2] mm: Real pagecache iterators
From: Kent Overstreet
Date: Thu Mar 31 2016 - 22:38:26 EST
Introduce for_each_pagecache_page() and related macros, with the goal of
replacing most/all uses of pagevec_lookup().
For the most part this shouldn't be a functional change. The one functional
difference with the new macros is that they now take an @end parameter, so we're
able to avoid grabbing pages in __find_get_pages() that we'll never use.
This patch only does some of the conversions, the ones I was able to easily test
myself - the conversions are mechanical but tricky enough they generally warrent
testing.
Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
---
fs/ext4/inode.c | 261 ++++++++++++++++++++----------------------------
include/linux/pagevec.h | 67 ++++++++++++-
mm/filemap.c | 76 +++++++++-----
mm/page-writeback.c | 148 +++++++++++----------------
mm/swap.c | 33 +-----
mm/truncate.c | 259 +++++++++++++++++------------------------------
6 files changed, 380 insertions(+), 464 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dab84a2530..c4d73f67b5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1605,11 +1605,10 @@ struct mpage_da_data {
static void mpage_release_unused_pages(struct mpage_da_data *mpd,
bool invalidate)
{
- int nr_pages, i;
pgoff_t index, end;
- struct pagevec pvec;
+ struct pagecache_iter iter;
+ struct page *page;
struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
/* This is necessary when next_page == 0. */
if (mpd->first_page >= mpd->next_page)
@@ -1624,25 +1623,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
ext4_es_remove_extent(inode, start, last - start + 1);
}
- pagevec_init(&pvec, 0);
- while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- if (invalidate) {
- block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
- ClearPageUptodate(page);
- }
- unlock_page(page);
+ for_each_pagecache_page(&iter, inode->i_mapping, index, end, page) {
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+ if (invalidate) {
+ block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ ClearPageUptodate(page);
}
- index = pvec.pages[nr_pages - 1]->index + 1;
- pagevec_release(&pvec);
+ unlock_page(page);
}
}
@@ -2209,8 +2197,8 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
*/
static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
{
- struct pagevec pvec;
- int nr_pages, i;
+ struct pagecache_iter iter;
+ struct page *page;
struct inode *inode = mpd->inode;
struct buffer_head *head, *bh;
int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
@@ -2224,67 +2212,55 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
lblk = start << bpp_bits;
pblock = mpd->map.m_pblk;
- pagevec_init(&pvec, 0);
- while (start <= end) {
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
- PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- if (page->index > end)
- break;
- /* Up to 'end' pages must be contiguous */
- BUG_ON(page->index != start);
- bh = head = page_buffers(page);
- do {
- if (lblk < mpd->map.m_lblk)
- continue;
- if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
- /*
- * Buffer after end of mapped extent.
- * Find next buffer in the page to map.
- */
- mpd->map.m_len = 0;
- mpd->map.m_flags = 0;
- /*
- * FIXME: If dioread_nolock supports
- * blocksize < pagesize, we need to make
- * sure we add size mapped so far to
- * io_end->size as the following call
- * can submit the page for IO.
- */
- err = mpage_process_page_bufs(mpd, head,
- bh, lblk);
- pagevec_release(&pvec);
- if (err > 0)
- err = 0;
- return err;
- }
- if (buffer_delay(bh)) {
- clear_buffer_delay(bh);
- bh->b_blocknr = pblock++;
- }
- clear_buffer_unwritten(bh);
- } while (lblk++, (bh = bh->b_this_page) != head);
-
- /*
- * FIXME: This is going to break if dioread_nolock
- * supports blocksize < pagesize as we will try to
- * convert potentially unmapped parts of inode.
- */
- mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
- /* Page fully mapped - let IO run! */
- err = mpage_submit_page(mpd, page);
- if (err < 0) {
- pagevec_release(&pvec);
+ for_each_pagecache_page(&iter, inode->i_mapping, start, end, page) {
+ /* Up to 'end' pages must be contiguous */
+ BUG_ON(page->index != start);
+ bh = head = page_buffers(page);
+ do {
+ if (lblk < mpd->map.m_lblk)
+ continue;
+ if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
+ /*
+ * Buffer after end of mapped extent. Find next
+ * buffer in the page to map.
+ */
+ mpd->map.m_len = 0;
+ mpd->map.m_flags = 0;
+ /*
+ * FIXME: If dioread_nolock supports blocksize <
+ * pagesize, we need to make sure we add size
+ * mapped so far to io_end->size as the
+ * following call can submit the page for IO.
+ */
+ err = mpage_process_page_bufs(mpd, head,
+ bh, lblk);
+ pagecache_iter_release(&iter);
+ if (err > 0)
+ err = 0;
return err;
}
- start++;
+ if (buffer_delay(bh)) {
+ clear_buffer_delay(bh);
+ bh->b_blocknr = pblock++;
+ }
+ clear_buffer_unwritten(bh);
+ } while (lblk++, (bh = bh->b_this_page) != head);
+
+ /*
+ * FIXME: This is going to break if dioread_nolock supports
+ * blocksize < pagesize as we will try to convert potentially
+ * unmapped parts of inode.
+ */
+ mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
+ /* Page fully mapped - let IO run! */
+ err = mpage_submit_page(mpd, page);
+ if (err < 0) {
+ pagecache_iter_release(&iter);
+ return err;
}
- pagevec_release(&pvec);
+ start++;
}
+
/* Extent fully mapped and matches with page boundary. We are done. */
mpd->map.m_len = 0;
mpd->map.m_flags = 0;
@@ -2485,13 +2461,10 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
{
struct address_space *mapping = mpd->inode->i_mapping;
- struct pagevec pvec;
- unsigned int nr_pages;
+ struct pagecache_iter iter;
+ struct page *page;
long left = mpd->wbc->nr_to_write;
- pgoff_t index = mpd->first_page;
- pgoff_t end = mpd->last_page;
- int tag;
- int i, err = 0;
+ int tag, err = 0;
int blkbits = mpd->inode->i_blkbits;
ext4_lblk_t lblk;
struct buffer_head *head;
@@ -2501,81 +2474,59 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
else
tag = PAGECACHE_TAG_DIRTY;
- pagevec_init(&pvec, 0);
mpd->map.m_len = 0;
- mpd->next_page = index;
- while (index <= end) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- goto out;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ mpd->next_page = mpd->first_page;
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end)
- goto out;
-
- /*
- * Accumulated enough dirty pages? This doesn't apply
- * to WB_SYNC_ALL mode. For integrity sync we have to
- * keep going because someone may be concurrently
- * dirtying pages, and we might have synced a lot of
- * newly appeared dirty pages, but have not synced all
- * of the old dirty pages.
- */
- if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
- goto out;
-
- /* If we can't merge this page, we are done. */
- if (mpd->map.m_len > 0 && mpd->next_page != page->index)
- goto out;
+ for_each_pagecache_tag(&iter, mapping, tag, mpd->first_page,
+ mpd->last_page, page) {
+ /*
+ * Accumulated enough dirty pages? This doesn't apply to
+ * WB_SYNC_ALL mode. For integrity sync we have to keep going
+ * because someone may be concurrently dirtying pages, and we
+ * might have synced a lot of newly appeared dirty pages, but
+ * have not synced all of the old dirty pages.
+ */
+ if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
+ break;
- lock_page(page);
- /*
- * If the page is no longer dirty, or its mapping no
- * longer corresponds to inode we are writing (which
- * means it has been truncated or invalidated), or the
- * page is already under writeback and we are not doing
- * a data integrity writeback, skip the page
- */
- if (!PageDirty(page) ||
- (PageWriteback(page) &&
- (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
- unlikely(page->mapping != mapping)) {
- unlock_page(page);
- continue;
- }
+ /* If we can't merge this page, we are done. */
+ if (mpd->map.m_len > 0 && mpd->next_page != page->index)
+ break;
- wait_on_page_writeback(page);
- BUG_ON(PageWriteback(page));
-
- if (mpd->map.m_len == 0)
- mpd->first_page = page->index;
- mpd->next_page = page->index + 1;
- /* Add all dirty buffers to mpd */
- lblk = ((ext4_lblk_t)page->index) <<
- (PAGE_CACHE_SHIFT - blkbits);
- head = page_buffers(page);
- err = mpage_process_page_bufs(mpd, head, head, lblk);
- if (err <= 0)
- goto out;
- err = 0;
- left--;
+ lock_page(page);
+ /*
+ * If the page is no longer dirty, or its mapping no longer
+ * corresponds to inode we are writing (which means it has been
+ * truncated or invalidated), or the page is already under
+ * writeback and we are not doing a data integrity writeback,
+ * skip the page
+ */
+ if (!PageDirty(page) ||
+ (PageWriteback(page) &&
+ (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
+ unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ continue;
}
- pagevec_release(&pvec);
- cond_resched();
+
+ wait_on_page_writeback(page);
+ BUG_ON(PageWriteback(page));
+
+ if (mpd->map.m_len == 0)
+ mpd->first_page = page->index;
+ mpd->next_page = page->index + 1;
+ /* Add all dirty buffers to mpd */
+ lblk = ((ext4_lblk_t)page->index) <<
+ (PAGE_CACHE_SHIFT - blkbits);
+ head = page_buffers(page);
+ err = mpage_process_page_bufs(mpd, head, head, lblk);
+ if (err <= 0)
+ break;
+ err = 0;
+ left--;
}
- return 0;
-out:
- pagevec_release(&pvec);
+ pagecache_iter_release(&iter);
+
return err;
}
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index b45d391b45..e60d74148d 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -22,10 +22,6 @@ struct pagevec {
void __pagevec_release(struct pagevec *pvec);
void __pagevec_lru_add(struct pagevec *pvec);
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
- struct address_space *mapping,
- pgoff_t start, unsigned nr_entries,
- pgoff_t *indices);
void pagevec_remove_exceptionals(struct pagevec *pvec);
unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
pgoff_t start, unsigned nr_pages);
@@ -69,4 +65,67 @@ static inline void pagevec_release(struct pagevec *pvec)
__pagevec_release(pvec);
}
+struct pagecache_iter {
+ unsigned nr;
+ unsigned idx;
+ pgoff_t index;
+ struct page *pages[PAGEVEC_SIZE];
+ pgoff_t indices[PAGEVEC_SIZE];
+};
+
+static inline void pagecache_iter_init(struct pagecache_iter *iter,
+ pgoff_t start)
+{
+ iter->nr = 0;
+ iter->idx = 0;
+ iter->index = start;
+}
+
+void __pagecache_iter_release(struct pagecache_iter *iter);
+
+/**
+ * pagecache_iter_release - release cached pages from pagacache_iter
+ *
+ * Must be called if breaking out of for_each_pagecache_page() etc. early - not
+ * needed if pagecache_iter_next() returned NULL and loop terminated normally
+ */
+static inline void pagecache_iter_release(struct pagecache_iter *iter)
+{
+ if (iter->nr)
+ __pagecache_iter_release(iter);
+}
+
+struct page *pagecache_iter_next(struct pagecache_iter *iter,
+ struct address_space *mapping,
+ pgoff_t end, pgoff_t *index,
+ unsigned flags);
+
+#define __pagecache_iter_for_each(_iter, _mapping, _start, _end, \
+ _page, _index, _flags) \
+ for (pagecache_iter_init((_iter), (_start)); \
+ ((_page) = pagecache_iter_next((_iter), (_mapping), \
+ (_end), (_index), (_flags)));)
+
+#define for_each_pagecache_page(_iter, _mapping, _start, _end, _page) \
+ __pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+ (_page), NULL, 0)
+
+#define for_each_pagecache_page_contig(_iter, _mapping, _start, _end, _page)\
+ __pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+ (_page), NULL, RADIX_TREE_ITER_CONTIG)
+
+#define for_each_pagecache_tag(_iter, _mapping, _tag, _start, _end, _page)\
+ __pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+ (_page), NULL, RADIX_TREE_ITER_TAGGED|(_tag))
+
+#define for_each_pagecache_entry(_iter, _mapping, _start, _end, _page, _index)\
+ __pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+ (_page), &(_index), RADIX_TREE_ITER_EXCEPTIONAL)
+
+#define for_each_pagecache_entry_tag(_iter, _mapping, _tag, \
+ _start, _end, _page, _index) \
+ __pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+ (_page), &(_index), RADIX_TREE_ITER_EXCEPTIONAL|\
+ RADIX_TREE_ITER_TAGGED|(_tag))
+
#endif /* _LINUX_PAGEVEC_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 81ce03fbc1..11fbc97f8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -352,35 +352,20 @@ EXPORT_SYMBOL(filemap_flush);
static int __filemap_fdatawait_range(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
- pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
+ pgoff_t start = start_byte >> PAGE_CACHE_SHIFT;
pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
- struct pagevec pvec;
- int nr_pages;
+ struct pagecache_iter iter;
+ struct page *page;
int ret = 0;
if (end_byte < start_byte)
goto out;
- pagevec_init(&pvec, 0);
- while ((index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_WRITEBACK,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
- unsigned i;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- /* until radix tree lookup accepts end_index */
- if (page->index > end)
- continue;
-
- wait_on_page_writeback(page);
- if (TestClearPageError(page))
- ret = -EIO;
- }
- pagevec_release(&pvec);
- cond_resched();
+ for_each_pagecache_tag(&iter, mapping, PAGECACHE_TAG_WRITEBACK,
+ start, end, page) {
+ wait_on_page_writeback(page);
+ if (TestClearPageError(page))
+ ret = -EIO;
}
out:
return ret;
@@ -1315,6 +1300,51 @@ no_entry:
}
EXPORT_SYMBOL(__find_get_pages);
+void __pagecache_iter_release(struct pagecache_iter *iter)
+{
+ lru_add_drain();
+ release_pages(iter->pages, iter->nr, 0);
+ iter->nr = 0;
+ iter->idx = 0;
+}
+EXPORT_SYMBOL(__pagecache_iter_release);
+
+/**
+ * pagecache_iter_next - get next page from pagecache iterator and advance
+ * iterator
+ * @iter: The iterator to advance
+ * @mapping: The address_space to search
+ * @end: Page cache index to stop at (inclusive)
+ * @index: if non NULL, index of page or entry will be returned here
+ * @flags: radix tree iter flags and tag for __find_get_pages()
+ */
+struct page *pagecache_iter_next(struct pagecache_iter *iter,
+ struct address_space *mapping,
+ pgoff_t end, pgoff_t *index,
+ unsigned flags)
+{
+ struct page *page;
+
+ if (iter->idx >= iter->nr) {
+ pagecache_iter_release(iter);
+ cond_resched();
+
+ iter->nr = __find_get_pages(mapping, iter->index, end,
+ PAGEVEC_SIZE, iter->pages,
+ iter->indices, flags);
+ if (!iter->nr)
+ return NULL;
+ }
+
+ iter->index = iter->indices[iter->idx] + 1;
+ if (index)
+ *index = iter->indices[iter->idx];
+ page = iter->pages[iter->idx];
+ iter->idx++;
+ return page;
+}
+EXPORT_SYMBOL(pagecache_iter_next);
+
/*
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
* a _large_ part of the i/o request. Imagine the worst scenario:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 11ff8f7586..2eb2e93313 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2154,10 +2154,10 @@ int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data)
{
+ struct pagecache_iter iter;
+ struct page *page;
int ret = 0;
int done = 0;
- struct pagevec pvec;
- int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end; /* Inclusive */
@@ -2166,7 +2166,6 @@ int write_cache_pages(struct address_space *mapping,
int range_whole = 0;
int tag;
- pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
@@ -2189,105 +2188,80 @@ int write_cache_pages(struct address_space *mapping,
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
- done_index = index;
- while (!done && (index <= end)) {
- int i;
-
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end) {
- /*
- * can't be range_cyclic (1st pass) because
- * end == -1 in that case.
- */
- done = 1;
- break;
- }
+ done_index = index;
- done_index = page->index;
+ for_each_pagecache_tag(&iter, mapping, tag, index, end, page) {
+ done_index = page->index;
- lock_page(page);
+ lock_page(page);
- /*
- * Page truncated or invalidated. We can freely skip it
- * then, even for data integrity operations: the page
- * has disappeared concurrently, so there could be no
- * real expectation of this data interity operation
- * even if there is now a new, dirty page at the same
- * pagecache address.
- */
- if (unlikely(page->mapping != mapping)) {
+ /*
+ * Page truncated or invalidated. We can freely skip it
+ * then, even for data integrity operations: the page
+ * has disappeared concurrently, so there could be no
+ * real expectation of this data interity operation
+ * even if there is now a new, dirty page at the same
+ * pagecache address.
+ */
+ if (unlikely(page->mapping != mapping)) {
continue_unlock:
- unlock_page(page);
- continue;
- }
-
- if (!PageDirty(page)) {
- /* someone wrote it for us */
- goto continue_unlock;
- }
+ unlock_page(page);
+ continue;
+ }
- if (PageWriteback(page)) {
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
- else
- goto continue_unlock;
- }
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
- BUG_ON(PageWriteback(page));
- if (!clear_page_dirty_for_io(page))
+ if (PageWriteback(page)) {
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+ else
goto continue_unlock;
+ }
- trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
- ret = (*writepage)(page, wbc, data);
- if (unlikely(ret)) {
- if (ret == AOP_WRITEPAGE_ACTIVATE) {
- unlock_page(page);
- ret = 0;
- } else {
- /*
- * done_index is set past this page,
- * so media errors will not choke
- * background writeout for the entire
- * file. This has consequences for
- * range_cyclic semantics (ie. it may
- * not be suitable for data integrity
- * writeout).
- */
- done_index = page->index + 1;
- done = 1;
- break;
- }
- }
+ BUG_ON(PageWriteback(page));
+ if (!clear_page_dirty_for_io(page))
+ goto continue_unlock;
- /*
- * We stop writing back only if we are not doing
- * integrity sync. In case of integrity sync we have to
- * keep going until we have written all the pages
- * we tagged for writeback prior to entering this loop.
- */
- if (--wbc->nr_to_write <= 0 &&
- wbc->sync_mode == WB_SYNC_NONE) {
+ trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+ ret = (*writepage)(page, wbc, data);
+ if (unlikely(ret)) {
+ if (ret == AOP_WRITEPAGE_ACTIVATE) {
+ unlock_page(page);
+ ret = 0;
+ } else {
+ /*
+ * done_index is set past this page,
+ * so media errors will not choke
+ * background writeout for the entire
+ * file. This has consequences for
+ * range_cyclic semantics (ie. it may
+ * not be suitable for data integrity
+ * writeout).
+ */
+ done_index = page->index + 1;
done = 1;
break;
}
}
- pagevec_release(&pvec);
- cond_resched();
+
+ /*
+ * We stop writing back only if we are not doing
+ * integrity sync. In case of integrity sync we have to
+ * keep going until we have written all the pages
+ * we tagged for writeback prior to entering this loop.
+ */
+ if (--wbc->nr_to_write <= 0 &&
+ wbc->sync_mode == WB_SYNC_NONE) {
+ done = 1;
+ break;
+ }
}
+ pagecache_iter_release(&iter);
+
if (!cycled && !done) {
/*
* range_cyclic:
diff --git a/mm/swap.c b/mm/swap.c
index 09fe5e9771..f48cedeb1c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -718,6 +718,9 @@ void release_pages(struct page **pages, int nr, bool cold)
for (i = 0; i < nr; i++) {
struct page *page = pages[i];
+ if (radix_tree_exceptional_entry(page))
+ continue;
+
/*
* Make sure the IRQ-safe lock-holding time does not get
* excessive with a continuous string of pages from the
@@ -857,36 +860,6 @@ void __pagevec_lru_add(struct pagevec *pvec)
EXPORT_SYMBOL(__pagevec_lru_add);
/**
- * pagevec_lookup_entries - gang pagecache lookup
- * @pvec: Where the resulting entries are placed
- * @mapping: The address_space to search
- * @start: The starting entry index
- * @nr_entries: The maximum number of entries
- * @indices: The cache indices corresponding to the entries in @pvec
- *
- * pagevec_lookup_entries() will search for and return a group of up
- * to @nr_entries pages and shadow entries in the mapping. All
- * entries are placed in @pvec. pagevec_lookup_entries() takes a
- * reference against actual pages in @pvec.
- *
- * The search returns a group of mapping-contiguous entries with
- * ascending indexes. There may be holes in the indices due to
- * not-present entries.
- *
- * pagevec_lookup_entries() returns the number of entries which were
- * found.
- */
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
- struct address_space *mapping,
- pgoff_t start, unsigned nr_pages,
- pgoff_t *indices)
-{
- pvec->nr = find_get_entries(mapping, start, nr_pages,
- pvec->pages, indices);
- return pagevec_count(pvec);
-}
-
-/**
* pagevec_remove_exceptionals - pagevec exceptionals pruning
* @pvec: The pagevec to prune
*
diff --git a/mm/truncate.c b/mm/truncate.c
index 7598b552ae..dca55e4b97 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -231,10 +231,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
pgoff_t end; /* exclusive */
unsigned int partial_start; /* inclusive */
unsigned int partial_end; /* exclusive */
- struct pagevec pvec;
- pgoff_t indices[PAGEVEC_SIZE];
- pgoff_t index;
- int i;
+ struct pagecache_iter iter;
+ struct page *page;
+ pgoff_t index;
+ bool found;
cleancache_invalidate_inode(mapping);
if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
@@ -250,51 +250,36 @@ void truncate_inode_pages_range(struct address_space *mapping,
* start of the range and 'partial_end' at the end of the range.
* Note that 'end' is exclusive while 'lend' is inclusive.
*/
- start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ start = round_up(lstart, PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT;
if (lend == -1)
/*
- * lend == -1 indicates end-of-file so we have to set 'end'
- * to the highest possible pgoff_t and since the type is
- * unsigned we're using -1.
+ * lend == -1 indicates end-of-file so we have to set 'end' to
+ * the highest possible pgoff_t
*/
- end = -1;
+ end = ULONG_MAX;
else
end = (lend + 1) >> PAGE_CACHE_SHIFT;
- pagevec_init(&pvec, 0);
- index = start;
- while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE),
- indices)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
-
- /* We rely upon deletion not changing page->index */
- index = indices[i];
- if (index >= end)
- break;
+ if (start >= end)
+ goto do_partial;
- if (radix_tree_exceptional_entry(page)) {
- clear_exceptional_entry(mapping, index, page);
- continue;
- }
+ for_each_pagecache_entry(&iter, mapping, start, end - 1, page, index) {
+ if (radix_tree_exceptional_entry(page)) {
+ clear_exceptional_entry(mapping, index, page);
+ continue;
+ }
- if (!trylock_page(page))
- continue;
- WARN_ON(page->index != index);
- if (PageWriteback(page)) {
- unlock_page(page);
- continue;
- }
- truncate_inode_page(mapping, page);
+ if (!trylock_page(page))
+ continue;
+ WARN_ON(page->index != index);
+ if (PageWriteback(page)) {
unlock_page(page);
+ continue;
}
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- cond_resched();
- index++;
+ truncate_inode_page(mapping, page);
+ unlock_page(page);
}
-
+do_partial:
if (partial_start) {
struct page *page = find_lock_page(mapping, start - 1);
if (page) {
@@ -334,34 +319,12 @@ void truncate_inode_pages_range(struct address_space *mapping,
if (start >= end)
return;
- index = start;
- for ( ; ; ) {
- cond_resched();
- if (!pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
- /* If all gone from start onwards, we're done */
- if (index == start)
- break;
- /* Otherwise restart to make sure all gone */
- index = start;
- continue;
- }
- if (index == start && indices[0] >= end) {
- /* All gone out of hole to be punched, we're done */
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- break;
- }
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
-
- /* We rely upon deletion not changing page->index */
- index = indices[i];
- if (index >= end) {
- /* Restart punch to make sure all gone */
- index = start - 1;
- break;
- }
+ do {
+ found = false;
+
+ for_each_pagecache_entry(&iter, mapping, start,
+ end - 1, page, index) {
+ found = true;
if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page);
@@ -374,10 +337,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
truncate_inode_page(mapping, page);
unlock_page(page);
}
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- index++;
- }
+ } while (found);
+
cleancache_invalidate_inode(mapping);
}
EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -463,48 +424,32 @@ EXPORT_SYMBOL(truncate_inode_pages_final);
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
- pgoff_t indices[PAGEVEC_SIZE];
- struct pagevec pvec;
- pgoff_t index = start;
+ struct pagecache_iter iter;
+ struct page *page;
+ pgoff_t index;
unsigned long ret;
unsigned long count = 0;
- int i;
-
- pagevec_init(&pvec, 0);
- while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
- indices)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- /* We rely upon deletion not changing page->index */
- index = indices[i];
- if (index > end)
- break;
-
- if (radix_tree_exceptional_entry(page)) {
- clear_exceptional_entry(mapping, index, page);
- continue;
- }
-
- if (!trylock_page(page))
- continue;
- WARN_ON(page->index != index);
- ret = invalidate_inode_page(page);
- unlock_page(page);
- /*
- * Invalidation is a hint that the page is no longer
- * of interest and try to speed up its reclaim.
- */
- if (!ret)
- deactivate_file_page(page);
- count += ret;
+ for_each_pagecache_entry(&iter, mapping, start, end, page, index) {
+ if (radix_tree_exceptional_entry(page)) {
+ clear_exceptional_entry(mapping, index, page);
+ continue;
}
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- cond_resched();
- index++;
+
+ if (!trylock_page(page))
+ continue;
+ WARN_ON(page->index != index);
+ ret = invalidate_inode_page(page);
+ unlock_page(page);
+ /*
+ * Invalidation is a hint that the page is no longer
+ * of interest and try to speed up its reclaim.
+ */
+ if (!ret)
+ deactivate_file_page(page);
+ count += ret;
}
+
return count;
}
EXPORT_SYMBOL(invalidate_mapping_pages);
@@ -568,75 +513,59 @@ static int do_launder_page(struct address_space *mapping, struct page *page)
int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
- pgoff_t indices[PAGEVEC_SIZE];
- struct pagevec pvec;
+ struct pagecache_iter iter;
+ struct page *page;
pgoff_t index;
- int i;
int ret = 0;
int ret2 = 0;
int did_range_unmap = 0;
cleancache_invalidate_inode(mapping);
- pagevec_init(&pvec, 0);
- index = start;
- while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
- indices)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
-
- /* We rely upon deletion not changing page->index */
- index = indices[i];
- if (index > end)
- break;
- if (radix_tree_exceptional_entry(page)) {
- clear_exceptional_entry(mapping, index, page);
- continue;
- }
+ for_each_pagecache_entry(&iter, mapping, start, end, page, index) {
+ if (radix_tree_exceptional_entry(page)) {
+ clear_exceptional_entry(mapping, index, page);
+ continue;
+ }
- lock_page(page);
- WARN_ON(page->index != index);
- if (page->mapping != mapping) {
- unlock_page(page);
- continue;
- }
- wait_on_page_writeback(page);
- if (page_mapped(page)) {
- if (!did_range_unmap) {
- /*
- * Zap the rest of the file in one hit.
- */
- unmap_mapping_range(mapping,
- (loff_t)index << PAGE_CACHE_SHIFT,
- (loff_t)(1 + end - index)
- << PAGE_CACHE_SHIFT,
- 0);
- did_range_unmap = 1;
- } else {
- /*
- * Just zap this page
- */
- unmap_mapping_range(mapping,
- (loff_t)index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, 0);
- }
- }
- BUG_ON(page_mapped(page));
- ret2 = do_launder_page(mapping, page);
- if (ret2 == 0) {
- if (!invalidate_complete_page2(mapping, page))
- ret2 = -EBUSY;
- }
- if (ret2 < 0)
- ret = ret2;
+ lock_page(page);
+ WARN_ON(page->index != index);
+ if (page->mapping != mapping) {
unlock_page(page);
+ continue;
}
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- cond_resched();
- index++;
+ wait_on_page_writeback(page);
+ if (page_mapped(page)) {
+ if (!did_range_unmap) {
+ /*
+ * Zap the rest of the file in one hit.
+ */
+ unmap_mapping_range(mapping,
+ (loff_t)index << PAGE_CACHE_SHIFT,
+ (loff_t)(1 + end - index)
+ << PAGE_CACHE_SHIFT,
+ 0);
+ did_range_unmap = 1;
+ } else {
+ /*
+ * Just zap this page
+ */
+ unmap_mapping_range(mapping,
+ (loff_t)index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 0);
+ }
+ }
+ BUG_ON(page_mapped(page));
+ ret2 = do_launder_page(mapping, page);
+ if (ret2 == 0) {
+ if (!invalidate_complete_page2(mapping, page))
+ ret2 = -EBUSY;
+ }
+ if (ret2 < 0)
+ ret = ret2;
+ unlock_page(page);
}
+
cleancache_invalidate_inode(mapping);
return ret;
}
--
2.8.0.rc3