Re: [PATCH 17/17] orangefs: implement writepages

From: martin
Date: Tue Sep 18 2018 - 17:46:13 EST


On Mon, Sep 17, 2018 at 08:10:54PM +0000, Martin Brandenburg wrote:
> Go through pages and look for a consecutive writable region. After
> finding 128 consecutive writable pages or when finding a non-consecutive
> region, do the write.

128 was chosen arbitrarily. I tested a number of higher counts, but saw
no appreciable improvement, and let 128 stand.

Mike tells me that there is an improvement when the count is such that
the length of the write would be equal to the maximum length of I/O with
the client-core (on a 4096 page size machine, this is 1024).

When I tested, struct orangefs_writepages was on the stack, and I
couldn't go that high without an overflow.

He asked me, and I don't know the answer, so I repeat it here, whether
"Would changing out the kalloc [really kcalloc] in
orangefs_writepages_work to be based on kmem_cache_alloc pre-allocations
be a good thing?"

I wonder the same thing about the kzalloc in orangefs_writepages. I will
change struct orangefs_writepages to include the struct bio_vec in
orangefs_writepages_work so we don't have to do two memory allocations.

>
> Signed-off-by: Martin Brandenburg <martin@xxxxxxxxxxxx>
> ---
> fs/orangefs/inode.c | 135 +++++++++++++++++++++++++++++++++-
> fs/orangefs/orangefs-kernel.h | 1 +
> fs/orangefs/super.c | 1 +
> 3 files changed, 135 insertions(+), 2 deletions(-)
>
> diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
> index 178734920e45..34b98d2ed377 100644
> --- a/fs/orangefs/inode.c
> +++ b/fs/orangefs/inode.c
> @@ -15,6 +15,8 @@
> #include "orangefs-kernel.h"
> #include "orangefs-bufmap.h"
>
> +#define ORANGEFS_WRITEPAGES_COUNT 128
> +
> static int orangefs_writepage_locked(struct page *page,
> struct writeback_control *wbc)
> {
> @@ -44,10 +46,10 @@ static int orangefs_writepage_locked(struct page *page,
> len = i_size_read(inode);
> }
> } else {
> -/* BUG();*/
> + end_page_writeback(page);
> /* It's not private so there's nothing to write, right? */
> printk("writepage not private!\n");
> - end_page_writeback(page);
> + BUG();
> return 0;
>
> }
> @@ -230,6 +232,134 @@ static int orangefs_readpage(struct file *file, struct page *page)
> return ret;
> }
>
> +struct orangefs_writepages {
> + loff_t off;
> + size_t len;
> + struct page *pages[ORANGEFS_WRITEPAGES_COUNT];
> + int npages;
> +};
> +
> +static int orangefs_writepages_work(struct orangefs_writepages *ow,
> + struct writeback_control *wbc)
> +{
> + struct inode *inode = ow->pages[0]->mapping->host;
> + struct orangefs_write_request *wr;
> + struct iov_iter iter;
> + struct bio_vec *bv;
> + ssize_t ret;
> + loff_t off;
> + int i;
> +
> + bv = kcalloc(ORANGEFS_WRITEPAGES_COUNT, sizeof(struct bio_vec),
> + GFP_KERNEL);
> + if (!bv)
> + return -ENOMEM;
> +
> + for (i = 0; i < ow->npages; i++) {
> + set_page_writeback(ow->pages[i]);
> + bv[i].bv_page = ow->pages[i];
> + /* uh except the last one maybe... */
> + if (i == ow->npages - 1 && ow->len % PAGE_SIZE)
> + bv[i].bv_len = ow->len % PAGE_SIZE;
> + else
> + bv[i].bv_len = PAGE_SIZE;
> + bv[i].bv_offset = 0;
> + }
> + iov_iter_bvec(&iter, ITER_BVEC | WRITE, bv, ow->npages, ow->len);
> +
> + off = ow->off;
> + ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len,
> + 0, NULL);
> + if (ret < 0) {
> + for (i = 0; i < ow->npages; i++) {
> + SetPageError(ow->pages[i]);
> + mapping_set_error(ow->pages[i]->mapping, ret);
> + end_page_writeback(ow->pages[i]);
> + unlock_page(ow->pages[i]);
> + }
> + } else {
> + for (i = 0; i < ow->npages; i++) {
> + if (PagePrivate(ow->pages[i])) {
> + wr = (struct orangefs_write_request *)
> + page_private(ow->pages[i]);
> + ClearPagePrivate(ow->pages[i]);
> + wr_release(wr);
> + }
> + end_page_writeback(ow->pages[i]);
> + unlock_page(ow->pages[i]);
> + }
> + }
> + kfree(bv);
> + return ret;
> +}
> +
> +static int orangefs_writepages_callback(struct page *page,
> + struct writeback_control *wbc, void *data)
> +{
> + struct orangefs_writepages *ow = data;
> + struct orangefs_write_request *wr;
> + int ret;
> +
> + if (!PagePrivate(page)) {
> + unlock_page(page);
> + /* It's not private so there's nothing to write, right? */
> + printk("writepages_callback not private!\n");
> + BUG();
> + return 0;
> + }
> + wr = (struct orangefs_write_request *)page_private(page);
> +
> + if (wr->len != PAGE_SIZE) {
> + ret = orangefs_writepage_locked(page, wbc);
> + mapping_set_error(page->mapping, ret);
> + unlock_page(page);
> + } else {
> + ret = -1;
> + if (ow->npages == 0) {
> + ow->off = wr->pos;
> + ow->len = wr->len;
> + ow->pages[ow->npages++] = page;
> + ret = 0;
> + }
> + if (ow->off + ow->len == wr->pos) {
> + ow->len += wr->len;
> + ow->pages[ow->npages++] = page;
> + ret = 0;
> + }
> + if (ret == -1) {
> + ret = orangefs_writepage_locked(page, wbc);
> + mapping_set_error(page->mapping, ret);
> + unlock_page(page);
> + } else {
> + if (ow->npages == ORANGEFS_WRITEPAGES_COUNT) {
> + orangefs_writepages_work(ow, wbc);
> + memset(ow, 0, sizeof *ow);
> + }
> + }
> + }
> + return ret;
> +}
> +
> +static int orangefs_writepages(struct address_space *mapping,
> + struct writeback_control *wbc)
> +{
> + struct orangefs_writepages *ow;
> + struct blk_plug plug;
> + int ret;
> + ow = kzalloc(sizeof(struct orangefs_writepages), GFP_KERNEL);
> + if (!ow)
> + return -ENOMEM;
> + mutex_lock(&ORANGEFS_SB(mapping->host->i_sb)->writepages_mutex);
> + blk_start_plug(&plug);
> + ret = write_cache_pages(mapping, wbc, orangefs_writepages_callback, ow);
> + if (ow->npages)
> + ret = orangefs_writepages_work(ow, wbc);
> + blk_finish_plug(&plug);
> + mutex_unlock(&ORANGEFS_SB(mapping->host->i_sb)->writepages_mutex);
> + kfree(ow);
> + return ret;
> +}
> +
> static int orangefs_write_begin(struct file *file,
> struct address_space *mapping, loff_t pos, unsigned len, unsigned flags,
> struct page **pagep, void **fsdata)
> @@ -325,6 +455,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
> static const struct address_space_operations orangefs_address_operations = {
> .writepage = orangefs_writepage,
> .readpage = orangefs_readpage,
> + .writepages = orangefs_writepages,
> .set_page_dirty = __set_page_dirty_nobuffers,
> .write_begin = orangefs_write_begin,
> .write_end = orangefs_write_end,
> diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
> index 256851bab7a5..9e23f97fb5cc 100644
> --- a/fs/orangefs/orangefs-kernel.h
> +++ b/fs/orangefs/orangefs-kernel.h
> @@ -220,6 +220,7 @@ struct orangefs_sb_info_s {
> int mount_pending;
> int no_list;
> struct list_head list;
> + struct mutex writepages_mutex;
> };
>
> struct orangefs_stats {
> diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
> index 83abe5ec2d11..204e1ac7f228 100644
> --- a/fs/orangefs/super.c
> +++ b/fs/orangefs/super.c
> @@ -467,6 +467,7 @@ static int orangefs_fill_sb(struct super_block *sb,
>
> sb->s_export_op = &orangefs_export_ops;
> sb->s_root = root_dentry;
> + mutex_init(&ORANGEFS_SB(sb)->writepages_mutex);
> return 0;
> }
>
> --
> 2.19.0
>