Re: [PATCH v9 6/7] apei/ghes: Use xchg_release() for updating new cache slot instead of cmpxchg()
From: Ard Biesheuvel
Date: Mon Oct 17 2022 - 10:39:50 EST
On Mon, 17 Oct 2022 at 15:02, Jia He <justin.he@xxxxxxx> wrote:
>
> From: Ard Biesheuvel <ardb@xxxxxxxxxx>
>
> From: Ard Biesheuvel <ardb@xxxxxxxxxx>
>
> ghes_estatus_cache_add() selects a slot, and either succeeds in
> replacing its contents with a pointer to a new cached item, or it just
> gives up and frees the new item again, without attempting to select
> another slot even if one might be available.
>
> Since only inserting new items is needed, the race can only cause a failure
> if the selected slot was updated with another new item concurrently,
> which means that it is arbitrary which of those two items gets
> dropped. This means the cmpxchg() and the special case are not necessary,
> and hence just drop the existing item unconditionally. Note that this
> does not result in loss of error events, it simply means we might
> cause a false cache miss, and report the same event one additional
> time in quick succession even if the cache should have prevented that.
>
> Move the xchg_release() and call_rcu out of rcu_read_lock/unlock section
> since there is no actually dereferencing the pointer at all.
>
> Co-developed-by: Jia He <justin.he@xxxxxxx>
> Signed-off-by: Jia He <justin.he@xxxxxxx>
> Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx>
> ---
> drivers/acpi/apei/ghes.c | 47 +++++++++++++++++++++-------------------
> 1 file changed, 25 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 27c72b175e4b..5d7754053ca0 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -150,7 +150,7 @@ struct ghes_vendor_record_entry {
> static struct gen_pool *ghes_estatus_pool;
> static unsigned long ghes_estatus_pool_size_request;
>
> -static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
> +static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
> static atomic_t ghes_estatus_cache_alloced;
>
> static int ghes_panic_timeout __read_mostly = 30;
> @@ -785,31 +785,26 @@ static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
> return cache;
> }
>
> -static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
> +static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
> {
> + struct ghes_estatus_cache *cache;
> u32 len;
>
> + cache = container_of(head, struct ghes_estatus_cache, rcu);
> len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
> len = GHES_ESTATUS_CACHE_LEN(len);
> gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
> atomic_dec(&ghes_estatus_cache_alloced);
> }
>
> -static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
> -{
> - struct ghes_estatus_cache *cache;
> -
> - cache = container_of(head, struct ghes_estatus_cache, rcu);
> - ghes_estatus_cache_free(cache);
> -}
> -
> static void ghes_estatus_cache_add(
> struct acpi_hest_generic *generic,
> struct acpi_hest_generic_status *estatus)
> {
> int i, slot = -1, count;
> unsigned long long now, duration, period, max_period = 0;
> - struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
> + struct ghes_estatus_cache *cache, *new_cache;
> + struct ghes_estatus_cache __rcu *victim;
>
> new_cache = ghes_estatus_cache_alloc(generic, estatus);
> if (new_cache == NULL)
> @@ -820,13 +815,11 @@ static void ghes_estatus_cache_add(
> cache = rcu_dereference(ghes_estatus_caches[i]);
> if (cache == NULL) {
> slot = i;
> - slot_cache = NULL;
> break;
> }
> duration = now - cache->time_in;
> if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
> slot = i;
> - slot_cache = cache;
> break;
> }
> count = atomic_read(&cache->count);
> @@ -835,18 +828,28 @@ static void ghes_estatus_cache_add(
> if (period > max_period) {
> max_period = period;
> slot = i;
> - slot_cache = cache;
> }
> }
> - /* new_cache must be put into array after its contents are written */
> - smp_wmb();
> - if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
> - slot_cache, new_cache) == slot_cache) {
> - if (slot_cache)
> - call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
> - } else
> - ghes_estatus_cache_free(new_cache);
> rcu_read_unlock();
> +
> + if (slot != -1) {
> + /*
> + * Use release semantics to ensure that ghes_estatus_cached()
> + * running on another CPU will see the updated cache fields if
> + * it can see the new value of the pointer.
> + */
> + victim = xchg_release(&ghes_estatus_caches[slot], new_cache);
> +
This still lacks the RCU_INITIALIZER()
> + /*
> + * At this point, victim may point to a cached item different
> + * from the one based on which we selected the slot. Instead of
> + * going to the loop again to pick another slot, let's just
> + * drop the other item anyway: this may cause a false cache
> + * miss later on, but that won't cause any problems.
> + */
> + if (victim)
> + call_rcu(unrcu_pointer(&victim->rcu), ghes_estatus_cache_rcu_free);
Please use &unrcu_pointer(victim)->rcu here.
> + }
> }
>
> static void __ghes_panic(struct ghes *ghes,
> --
> 2.25.1
>