Re: [PATCH v4] btrfs: Use larger zlib buffer for s390 hardware compression

From: Zaslonko Mikhail
Date: Wed Jan 08 2020 - 13:48:50 EST


Hello,

On 08.01.2020 16:08, Josef Bacik wrote:
> On 1/8/20 5:51 AM, Mikhail Zaslonko wrote:
>> In order to benefit from s390 zlib hardware compression support,
>> increase the btrfs zlib workspace buffer size from 1 to 4 pages (if
>> s390 zlib hardware support is enabled on the machine). This brings up
>> to 60% better performance in hardware on s390 compared to the PAGE_SIZE
>> buffer and much more compared to the software zlib processing in btrfs.
>> In case of memory pressure, fall back to a single page buffer during
>> workspace allocation.
>> The data compressed with larger input buffers will still conform to zlib
>> standard and thus can be decompressed also on a systems that uses only
>> PAGE_SIZE buffer for btrfs zlib.
>>
>> Signed-off-by: Mikhail Zaslonko <zaslonko@xxxxxxxxxxxxx>
>
>
>> ---
>> Â fs/btrfs/compression.c |ÂÂ 2 +-
>> Â fs/btrfs/zlib.cÂÂÂÂÂÂÂ | 135 ++++++++++++++++++++++++++++++-----------
>> Â 2 files changed, 101 insertions(+), 36 deletions(-)
>>
>> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
>> index ee834ef7beb4..6bd0e75a822c 100644
>> --- a/fs/btrfs/compression.c
>> +++ b/fs/btrfs/compression.c
>> @@ -1285,7 +1285,7 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
>> ÂÂÂÂÂ /* copy bytes from the working buffer into the pages */
>> ÂÂÂÂÂ while (working_bytes > 0) {
>> ÂÂÂÂÂÂÂÂÂ bytes = min_t(unsigned long, bvec.bv_len,
>> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ PAGE_SIZE - buf_offset);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ PAGE_SIZE - (buf_offset % PAGE_SIZE));
>> ÂÂÂÂÂÂÂÂÂ bytes = min(bytes, working_bytes);
>> Â ÂÂÂÂÂÂÂÂÂ kaddr = kmap_atomic(bvec.bv_page);
>> diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
>> index a6c90a003c12..05615a1099db 100644
>> --- a/fs/btrfs/zlib.c
>> +++ b/fs/btrfs/zlib.c
>> @@ -20,9 +20,13 @@
>> Â #include <linux/refcount.h>
>> Â #include "compression.h"
>> Â +/* workspace buffer size for s390 zlib hardware support */
>> +#define ZLIB_DFLTCC_BUF_SIZEÂÂÂ (4 * PAGE_SIZE)
>> +
>> Â struct workspace {
>> ÂÂÂÂÂ z_stream strm;
>> ÂÂÂÂÂ char *buf;
>> +ÂÂÂ unsigned int buf_size;
>> ÂÂÂÂÂ struct list_head list;
>> ÂÂÂÂÂ int level;
>> Â };
>> @@ -61,7 +65,21 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ zlib_inflate_workspacesize());
>> ÂÂÂÂÂ workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL);
>> ÂÂÂÂÂ workspace->level = level;
>> -ÂÂÂ workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
>> +ÂÂÂ workspace->buf = NULL;
>> +ÂÂÂ /*
>> +ÂÂÂÂ * In case of s390 zlib hardware support, allocate lager workspace
>> +ÂÂÂÂ * buffer. If allocator fails, fall back to a single page buffer.
>> +ÂÂÂÂ */
>> +ÂÂÂ if (zlib_deflate_dfltcc_enabled()) {
>> +ÂÂÂÂÂÂÂ workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ __GFP_NOMEMALLOC | __GFP_NORETRY |
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ __GFP_NOWARN | GFP_NOIO);
>> +ÂÂÂÂÂÂÂ workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
>> +ÂÂÂ }
>> +ÂÂÂ if (!workspace->buf) {
>> +ÂÂÂÂÂÂÂ workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
>> +ÂÂÂÂÂÂÂ workspace->buf_size = PAGE_SIZE;
>> +ÂÂÂ }
>> ÂÂÂÂÂ if (!workspace->strm.workspace || !workspace->buf)
>> ÂÂÂÂÂÂÂÂÂ goto fail;
>> Â @@ -85,6 +103,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
>> ÂÂÂÂÂ struct page *in_page = NULL;
>> ÂÂÂÂÂ struct page *out_page = NULL;
>> ÂÂÂÂÂ unsigned long bytes_left;
>> +ÂÂÂ unsigned int in_buf_pages;
>> ÂÂÂÂÂ unsigned long len = *total_out;
>> ÂÂÂÂÂ unsigned long nr_dest_pages = *out_pages;
>> ÂÂÂÂÂ const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
>> @@ -102,9 +121,6 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
>> ÂÂÂÂÂ workspace->strm.total_in = 0;
>> ÂÂÂÂÂ workspace->strm.total_out = 0;
>> Â -ÂÂÂ in_page = find_get_page(mapping, start >> PAGE_SHIFT);
>> -ÂÂÂ data_in = kmap(in_page);
>> -
>> ÂÂÂÂÂ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
>> ÂÂÂÂÂ if (out_page == NULL) {
>> ÂÂÂÂÂÂÂÂÂ ret = -ENOMEM;
>> @@ -114,12 +130,51 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
>> ÂÂÂÂÂ pages[0] = out_page;
>> ÂÂÂÂÂ nr_pages = 1;
>> Â -ÂÂÂ workspace->strm.next_in = data_in;
>> +ÂÂÂ workspace->strm.next_in = workspace->buf;
>> +ÂÂÂ workspace->strm.avail_in = 0;
>> ÂÂÂÂÂ workspace->strm.next_out = cpage_out;
>> ÂÂÂÂÂ workspace->strm.avail_out = PAGE_SIZE;
>> -ÂÂÂ workspace->strm.avail_in = min(len, PAGE_SIZE);
>> Â ÂÂÂÂÂ while (workspace->strm.total_in < len) {
>> +ÂÂÂÂÂÂÂ /*
>> +ÂÂÂÂÂÂÂÂ * Get next input pages and copy the contents to
>> +ÂÂÂÂÂÂÂÂ * the workspace buffer if required.
>> +ÂÂÂÂÂÂÂÂ */
>> +ÂÂÂÂÂÂÂ if (workspace->strm.avail_in == 0) {
>> +ÂÂÂÂÂÂÂÂÂÂÂ bytes_left = len - workspace->strm.total_in;
>> +ÂÂÂÂÂÂÂÂÂÂÂ in_buf_pages = min(DIV_ROUND_UP(bytes_left, PAGE_SIZE),
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ workspace->buf_size / PAGE_SIZE);
>> +ÂÂÂÂÂÂÂÂÂÂÂ if (in_buf_pages > 1) {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ int i;
>> +
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ for (i = 0; i < in_buf_pages; i++) {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (in_page) {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ kunmap(in_page);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ put_page(in_page);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ in_page = find_get_page(mapping,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ start >> PAGE_SHIFT);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ data_in = kmap(in_page);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ memcpy(workspace->buf + i * PAGE_SIZE,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ data_in, PAGE_SIZE);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ start += PAGE_SIZE;
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ workspace->strm.next_in = workspace->buf;
>> +ÂÂÂÂÂÂÂÂÂÂÂ } else {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (in_page) {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ kunmap(in_page);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ put_page(in_page);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ in_page = find_get_page(mapping,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ start >> PAGE_SHIFT);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ data_in = kmap(in_page);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ start += PAGE_SIZE;
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ workspace->strm.next_in = data_in;
>> +ÂÂÂÂÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂÂÂÂÂ workspace->strm.avail_in = min(bytes_left,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ (unsigned long) workspace->buf_size);
>> +ÂÂÂÂÂÂÂ }
>> +
>> ÂÂÂÂÂÂÂÂÂ ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
>> ÂÂÂÂÂÂÂÂÂ if (ret != Z_OK) {
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ pr_debug("BTRFS: deflate in loop returned %d\n",
>> @@ -161,33 +216,43 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
>> ÂÂÂÂÂÂÂÂÂ /* we're all done */
>> ÂÂÂÂÂÂÂÂÂ if (workspace->strm.total_in >= len)
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ break;
>> -
>> -ÂÂÂÂÂÂÂ /* we've read in a full page, get a new one */
>> -ÂÂÂÂÂÂÂ if (workspace->strm.avail_in == 0) {
>> -ÂÂÂÂÂÂÂÂÂÂÂ if (workspace->strm.total_out > max_out)
>> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ break;
>> -
>> -ÂÂÂÂÂÂÂÂÂÂÂ bytes_left = len - workspace->strm.total_in;
>> -ÂÂÂÂÂÂÂÂÂÂÂ kunmap(in_page);
>> -ÂÂÂÂÂÂÂÂÂÂÂ put_page(in_page);
>> -
>> -ÂÂÂÂÂÂÂÂÂÂÂ start += PAGE_SIZE;
>> -ÂÂÂÂÂÂÂÂÂÂÂ in_page = find_get_page(mapping,
>> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ start >> PAGE_SHIFT);
>> -ÂÂÂÂÂÂÂÂÂÂÂ data_in = kmap(in_page);
>> -ÂÂÂÂÂÂÂÂÂÂÂ workspace->strm.avail_in = min(bytes_left,
>> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ PAGE_SIZE);
>> -ÂÂÂÂÂÂÂÂÂÂÂ workspace->strm.next_in = data_in;
>> -ÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂ if (workspace->strm.total_out > max_out)
>> +ÂÂÂÂÂÂÂÂÂÂÂ break;
>> ÂÂÂÂÂ }
>> ÂÂÂÂÂ workspace->strm.avail_in = 0;
>> -ÂÂÂ ret = zlib_deflate(&workspace->strm, Z_FINISH);
>> -ÂÂÂ zlib_deflateEnd(&workspace->strm);
>> -
>> -ÂÂÂ if (ret != Z_STREAM_END) {
>> -ÂÂÂÂÂÂÂ ret = -EIO;
>> -ÂÂÂÂÂÂÂ goto out;
>> +ÂÂÂ /*
>> +ÂÂÂÂ * Call deflate with Z_FINISH flush parameter providing more output
>> +ÂÂÂÂ * space but no more input data, until it returns with Z_STREAM_END.
>> +ÂÂÂÂ */
>> +ÂÂÂ while (ret != Z_STREAM_END) {
>> +ÂÂÂÂÂÂÂ ret = zlib_deflate(&workspace->strm, Z_FINISH);
>> +ÂÂÂÂÂÂÂ if (ret == Z_STREAM_END)
>> +ÂÂÂÂÂÂÂÂÂÂÂ break;
>> +ÂÂÂÂÂÂÂ if (ret != Z_OK && ret != Z_BUF_ERROR) {
>> +ÂÂÂÂÂÂÂÂÂÂÂ zlib_deflateEnd(&workspace->strm);
>> +ÂÂÂÂÂÂÂÂÂÂÂ ret = -EIO;
>> +ÂÂÂÂÂÂÂÂÂÂÂ goto out;
>> +ÂÂÂÂÂÂÂ } else if (workspace->strm.avail_out == 0) {
>> +ÂÂÂÂÂÂÂÂÂÂÂ /* get another page for the stream end */
>> +ÂÂÂÂÂÂÂÂÂÂÂ kunmap(out_page);
>> +ÂÂÂÂÂÂÂÂÂÂÂ if (nr_pages == nr_dest_pages) {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ out_page = NULL;
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ret = -E2BIG;
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ goto out;
>> +ÂÂÂÂÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂÂÂÂÂ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
>> +ÂÂÂÂÂÂÂÂÂÂÂ if (out_page == NULL) {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ret = -ENOMEM;
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ goto out;
>> +ÂÂÂÂÂÂÂÂÂÂÂ }
>
> Do we need zlib_deflateEnd() for the above error cases? Thanks,

The original btrfs code did not call zlib_deflateEnd() for -E2BIG and
-ENOMEM cases, so I stick to the same logic.
Unlike userspace zlib where deflateEnd() frees all dynamically allocated
memory, in the kernel it doesn't do much apart from setting the return
code (since all the memory allocations for kernel zlib are performed in advance).

>
> Josef