Re: [PATCH v3 19/19] fs: handle inode->i_version more efficiently
From: Jan Kara
Date: Thu Dec 21 2017 - 06:48:13 EST
On Thu 21-12-17 06:25:55, Jeff Layton wrote:
> Got it, I think. How about this (sorry for the unrelated deltas here):
>
> [PATCH] SQUASH: add memory barriers around i_version accesses
Yep, this looks good to me.
Honza
>
> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> ---
> include/linux/iversion.h | 60 +++++++++++++++++++++++++++++++-----------------
> 1 file changed, 39 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/iversion.h b/include/linux/iversion.h
> index a9fbf99709df..1b3b5ed7c5b8 100644
> --- a/include/linux/iversion.h
> +++ b/include/linux/iversion.h
> @@ -89,6 +89,23 @@ inode_set_iversion_raw(struct inode *inode, const u64 val)
> atomic64_set(&inode->i_version, val);
> }
>
> +/**
> + * inode_peek_iversion_raw - grab a "raw" iversion value
> + * @inode: inode from which i_version should be read
> + *
> + * Grab a "raw" inode->i_version value and return it. The i_version is not
> + * flagged or converted in any way. This is mostly used to access a self-managed
> + * i_version.
> + *
> + * With those filesystems, we want to treat the i_version as an entirely
> + * opaque value.
> + */
> +static inline u64
> +inode_peek_iversion_raw(const struct inode *inode)
> +{
> + return atomic64_read(&inode->i_version);
> +}
> +
> /**
> * inode_set_iversion - set i_version to a particular value
> * @inode: inode to set
> @@ -152,7 +169,18 @@ inode_maybe_inc_iversion(struct inode *inode, bool force)
> {
> u64 cur, old, new;
>
> - cur = (u64)atomic64_read(&inode->i_version);
> + /*
> + * The i_version field is not strictly ordered with any other inode
> + * information, but the legacy inode_inc_iversion code used a spinlock
> + * to serialize increments.
> + *
> + * Here, we add full memory barriers to ensure that any de-facto
> + * ordering with other info is preserved.
> + *
> + * This barrier pairs with the barrier in inode_query_iversion()
> + */
> + smp_mb();
> + cur = inode_peek_iversion_raw(inode);
> for (;;) {
> /* If flag is clear then we needn't do anything */
> if (!force && !(cur & I_VERSION_QUERIED))
> @@ -183,23 +211,6 @@ inode_inc_iversion(struct inode *inode)
> inode_maybe_inc_iversion(inode, true);
> }
>
> -/**
> - * inode_peek_iversion_raw - grab a "raw" iversion value
> - * @inode: inode from which i_version should be read
> - *
> - * Grab a "raw" inode->i_version value and return it. The i_version is not
> - * flagged or converted in any way. This is mostly used to access a self-managed
> - * i_version.
> - *
> - * With those filesystems, we want to treat the i_version as an entirely
> - * opaque value.
> - */
> -static inline u64
> -inode_peek_iversion_raw(const struct inode *inode)
> -{
> - return atomic64_read(&inode->i_version);
> -}
> -
> /**
> * inode_iversion_need_inc - is the i_version in need of being incremented?
> * @inode: inode to check
> @@ -248,15 +259,22 @@ inode_query_iversion(struct inode *inode)
> {
> u64 cur, old, new;
>
> - cur = atomic64_read(&inode->i_version);
> + cur = inode_peek_iversion_raw(inode);
> for (;;) {
> /* If flag is already set, then no need to swap */
> - if (cur & I_VERSION_QUERIED)
> + if (cur & I_VERSION_QUERIED) {
> + /*
> + * This barrier (and the implicit barrier in the
> + * cmpxchg below) pairs with the barrier in
> + * inode_maybe_inc_iversion().
> + */
> + smp_mb();
> break;
> + }
>
> new = cur | I_VERSION_QUERIED;
> old = atomic64_cmpxchg(&inode->i_version, cur, new);
> - if (old == cur)
> + if (likely(old == cur))
> break;
> cur = old;
> }
> --
> 2.14.3
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR