Re: [PATCH v4 05/13] iov: Move iterator functions to a header file

From: David Howells
Date: Fri Sep 15 2023 - 05:39:35 EST


David Laight <David.Laight@xxxxxxxxxx> wrote:

> > Move the iterator functions to a header file so that other operations that
> > need to scan over an iterator can be added. For instance, the rbd driver
> > could use this to scan a buffer to see if it is all zeros and libceph could
> > use this to generate a crc.
>
> These all look a bit big for being more generally inlined.
>
> I know you want to avoid the indirect call in the normal cases,
> but maybe it would be ok for other uses?

So you'd advocate for something like:

size_t generic_iterate(struct iov_iter *iter, size_t len, void *priv,
void *priv2, iov_ustep_f ustep, iov_step_f step)
{
return iterate_and_advance2(iter, len, priv, priv2,
ustep, step);
}
EXPORT_SYMBOL(generic_iterate);

in lib/iov_iter.c and then call that from the places that want to use it?

I tried benchmarking that (see attached patch - it needs to go on top of my
iov patches). Running the insmod thrice and then filtering out and sorting
the results:

iov_kunit_benchmark_bvec: avg 3174 uS, stddev 68 uS
iov_kunit_benchmark_bvec: avg 3176 uS, stddev 61 uS
iov_kunit_benchmark_bvec: avg 3180 uS, stddev 64 uS
iov_kunit_benchmark_bvec_outofline: avg 3678 uS, stddev 4 uS
iov_kunit_benchmark_bvec_outofline: avg 3678 uS, stddev 5 uS
iov_kunit_benchmark_bvec_outofline: avg 3679 uS, stddev 6 uS
iov_kunit_benchmark_xarray: avg 3560 uS, stddev 5 uS
iov_kunit_benchmark_xarray: avg 3560 uS, stddev 6 uS
iov_kunit_benchmark_xarray: avg 3570 uS, stddev 16 uS
iov_kunit_benchmark_xarray_outofline: avg 4125 uS, stddev 13 uS
iov_kunit_benchmark_xarray_outofline: avg 4125 uS, stddev 2 uS
iov_kunit_benchmark_xarray_outofline: avg 4125 uS, stddev 6 uS

It adds almost 16% overhead:

(gdb) p 4125/3560.0
$2 = 1.1587078651685394
(gdb) p 3678/3174.0
$3 = 1.1587901701323251

I'm guessing a lot of that is due to function pointer mitigations.

Now, part of the code size expansion can be mitigated by using, say,
iterate_and_advance_kernel() if you know you aren't going to encounter
user-backed iterators, or even using, say, iterate_bvec() if you know you're
only going to see a specific iterator type.

David
---
iov_iter: Benchmark out of line generic iterator

diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h
index 2ebb86c041b6..8f562e80473b 100644
--- a/include/linux/iov_iter.h
+++ b/include/linux/iov_iter.h
@@ -293,4 +293,7 @@ size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv,
return progress;
}

+size_t generic_iterate(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_ustep_f ustep, iov_step_f step);
+
#endif /* _LINUX_IOV_ITER_H */
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8f7a10c4a295..f9643dd02676 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1684,3 +1684,10 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i,
return -EFAULT;
}
EXPORT_SYMBOL_GPL(iov_iter_extract_pages);
+
+size_t generic_iterate(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_ustep_f ustep, iov_step_f step)
+{
+ return iterate_and_advance2(iter, len, priv, priv2, ustep, step);
+}
+EXPORT_SYMBOL(generic_iterate);
diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c
index cc9c64663a73..f208516a68c9 100644
--- a/lib/kunit_iov_iter.c
+++ b/lib/kunit_iov_iter.c
@@ -18,6 +18,7 @@
#include <linux/writeback.h>
#include <linux/uio.h>
#include <linux/bvec.h>
+#include <linux/iov_iter.h>
#include <kunit/test.h>

MODULE_DESCRIPTION("iov_iter testing");
@@ -1571,6 +1572,124 @@ static void __init iov_kunit_benchmark_xarray(struct kunit *test)
KUNIT_SUCCEED();
}

+static noinline
+size_t shovel_to_user_iter(void __user *iter_to, size_t progress,
+ size_t len, void *from, void *priv2)
+{
+ if (should_fail_usercopy())
+ return len;
+ if (access_ok(iter_to, len)) {
+ from += progress;
+ instrument_copy_to_user(iter_to, from, len);
+ len = raw_copy_to_user(iter_to, from, len);
+ }
+ return len;
+}
+
+static noinline
+size_t shovel_to_kernel_iter(void *iter_to, size_t progress,
+ size_t len, void *from, void *priv2)
+{
+ memcpy(iter_to, from + progress, len);
+ return 0;
+}
+
+/*
+ * Time copying 256MiB through an ITER_BVEC with an out-of-line copier
+ * function.
+ */
+static void __init iov_kunit_benchmark_bvec_outofline(struct kunit *test)
+{
+ struct iov_iter iter;
+ struct bio_vec *bvec;
+ struct page *page;
+ unsigned int samples[IOV_KUNIT_NR_SAMPLES];
+ ktime_t a, b;
+ ssize_t copied;
+ size_t size = 256 * 1024 * 1024, npages = size / PAGE_SIZE;
+ void *scratch;
+ int i;
+
+ /* Allocate a page and tile it repeatedly in the buffer. */
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ kunit_add_action_or_reset(test, iov_kunit_free_page, page);
+
+ bvec = kunit_kmalloc_array(test, npages, sizeof(bvec[0]), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, bvec);
+ for (i = 0; i < npages; i++)
+ bvec_set_page(&bvec[i], page, PAGE_SIZE, 0);
+
+ /* Create a single large buffer to copy to/from. */
+ scratch = iov_kunit_create_source(test, npages);
+
+ /* Perform and time a bunch of copies. */
+ kunit_info(test, "Benchmarking copy_to_iter() over BVEC:\n");
+ for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) {
+ iov_iter_bvec(&iter, ITER_DEST, bvec, npages, size);
+ a = ktime_get_real();
+ copied = generic_iterate(&iter, size, scratch, NULL,
+ shovel_to_user_iter,
+ shovel_to_kernel_iter);
+ b = ktime_get_real();
+ KUNIT_EXPECT_EQ(test, copied, size);
+ samples[i] = ktime_to_us(ktime_sub(b, a));
+ }
+
+ iov_kunit_benchmark_print_stats(test, samples);
+ KUNIT_SUCCEED();
+}
+
+/*
+ * Time copying 256MiB through an ITER_XARRAY with an out-of-line copier
+ * function.
+ */
+static void __init iov_kunit_benchmark_xarray_outofline(struct kunit *test)
+{
+ struct iov_iter iter;
+ struct xarray *xarray;
+ struct page *page;
+ unsigned int samples[IOV_KUNIT_NR_SAMPLES];
+ ktime_t a, b;
+ ssize_t copied;
+ size_t size = 256 * 1024 * 1024, npages = size / PAGE_SIZE;
+ void *scratch;
+ int i;
+
+ /* Allocate a page and tile it repeatedly in the buffer. */
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ kunit_add_action_or_reset(test, iov_kunit_free_page, page);
+
+ xarray = iov_kunit_create_xarray(test);
+
+ for (i = 0; i < npages; i++) {
+ void *x = xa_store(xarray, i, page, GFP_KERNEL);
+
+ KUNIT_ASSERT_FALSE(test, xa_is_err(x));
+ }
+
+ /* Create a single large buffer to copy to/from. */
+ scratch = iov_kunit_create_source(test, npages);
+
+ /* Perform and time a bunch of copies. */
+ kunit_info(test, "Benchmarking copy_to_iter() over XARRAY:\n");
+ for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) {
+ iov_iter_xarray(&iter, ITER_DEST, xarray, 0, size);
+ a = ktime_get_real();
+
+ copied = generic_iterate(&iter, size, scratch, NULL,
+ shovel_to_user_iter,
+ shovel_to_kernel_iter);
+ b = ktime_get_real();
+ KUNIT_EXPECT_EQ(test, copied, size);
+ samples[i] = ktime_to_us(ktime_sub(b, a));
+ }
+
+ iov_kunit_benchmark_print_stats(test, samples);
+ KUNIT_SUCCEED();
+}
+
static struct kunit_case __refdata iov_kunit_cases[] = {
KUNIT_CASE(iov_kunit_copy_to_ubuf),
KUNIT_CASE(iov_kunit_copy_from_ubuf),
@@ -1593,6 +1712,8 @@ static struct kunit_case __refdata iov_kunit_cases[] = {
KUNIT_CASE(iov_kunit_benchmark_bvec),
KUNIT_CASE(iov_kunit_benchmark_bvec_split),
KUNIT_CASE(iov_kunit_benchmark_xarray),
+ KUNIT_CASE(iov_kunit_benchmark_bvec_outofline),
+ KUNIT_CASE(iov_kunit_benchmark_xarray_outofline),
{}
};