Re: [PATCH] block: optimise bvec_iter_advance()

From: Arvind Sankar
Date: Fri Nov 29 2019 - 17:17:24 EST


On Fri, Nov 29, 2019 at 12:04:37AM +0300, Pavel Begunkov wrote:
> bvec_iter_advance() is quite popular, but compilers fail to do proper
> alias analysis and optimise it good enough. The assembly is checked
> for gcc 9.2, x86-64.
>
> - remove @iter->bi_size from min(...), as it's always less than @bytes.
> Modify at the beginning and forget about it.
>
> - the compiler isn't able to collapse memory dependencies and remove
> writes in the loop. Help it by explicitely using local vars.
>
> Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
> ---
> include/linux/bvec.h | 23 ++++++++++++++---------
> 1 file changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/bvec.h b/include/linux/bvec.h
> index a032f01e928c..7b2f05faae14 100644
> --- a/include/linux/bvec.h
> +++ b/include/linux/bvec.h
> @@ -87,26 +87,31 @@ struct bvec_iter_all {
> static inline bool bvec_iter_advance(const struct bio_vec *bv,
> struct bvec_iter *iter, unsigned bytes)
> {
> + unsigned int done = iter->bi_bvec_done;
> + unsigned int idx = iter->bi_idx;
> +
> if (WARN_ONCE(bytes > iter->bi_size,
> "Attempted to advance past end of bvec iter\n")) {
> iter->bi_size = 0;
> return false;
> }
>
> + iter->bi_size -= bytes;
> +
> while (bytes) {
> - const struct bio_vec *cur = bv + iter->bi_idx;
> - unsigned len = min3(bytes, iter->bi_size,
> - cur->bv_len - iter->bi_bvec_done);
> + const struct bio_vec *cur = bv + idx;
> + unsigned int len = min(bytes, cur->bv_len - done);
>
> bytes -= len;
> - iter->bi_size -= len;
> - iter->bi_bvec_done += len;
> -
> - if (iter->bi_bvec_done == cur->bv_len) {
> - iter->bi_bvec_done = 0;
> - iter->bi_idx++;
> + done += len;
> + if (done == cur->bv_len) {
> + idx++;
> + done = 0;
> }
> }
> +
> + iter->bi_idx = idx;
> + iter->bi_bvec_done = done;
> return true;
> }
>
> --
> 2.24.0
>

The loop can be simplified a bit further, as done has to be 0 once we go
beyond the current bio_vec. See below for the simplified version.

I also check if bi_size became zero so we can skip the rest of the
calculations in that case. If we want to preserve the current behavior of
updating iter->bi_idx and iter->bi_bvec_done even if bi_size is going to
become zero, the loop condition should change to

while (bytes && bytes >= cur->bv_len)

to ensure that we don't try to access past the end of the bio_vec array.

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index a032f01e928c..380d188dfbd2 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -87,25 +87,26 @@ struct bvec_iter_all {
static inline bool bvec_iter_advance(const struct bio_vec *bv,
struct bvec_iter *iter, unsigned bytes)
{
+ unsigned int idx = iter->bi_idx;
+ const struct bio_vec *cur = bv + idx;
+
if (WARN_ONCE(bytes > iter->bi_size,
"Attempted to advance past end of bvec iter\n")) {
iter->bi_size = 0;
return false;
}

- while (bytes) {
- const struct bio_vec *cur = bv + iter->bi_idx;
- unsigned len = min3(bytes, iter->bi_size,
- cur->bv_len - iter->bi_bvec_done);
-
- bytes -= len;
- iter->bi_size -= len;
- iter->bi_bvec_done += len;
-
- if (iter->bi_bvec_done == cur->bv_len) {
- iter->bi_bvec_done = 0;
- iter->bi_idx++;
+ iter->bi_size -= bytes;
+ if (iter->bi_size != 0) {
+ bytes += iter->bi_bvec_done;
+ while (bytes >= cur->bv_len) {
+ bytes -= cur->bv_len;
+ idx++;
+ cur++;
}
+
+ iter->bi_idx = idx;
+ iter->bi_bvec_done = bytes;
}
return true;
}