Re: [PATCH] random: always use batched entropy for get_random_u{32,64}

From: Luck, Tony
Date: Mon Feb 24 2020 - 15:41:13 EST


On Fri, Feb 21, 2020 at 07:41:33PM -0500, Theodore Y. Ts'o wrote:
> On Fri, Feb 21, 2020 at 09:08:19PM +0100, Jason A. Donenfeld wrote:
> > On Thu, Feb 20, 2020 at 11:29 PM Tony Luck <tony.luck@xxxxxxxxx> wrote:
> > >
> > > Could we just disable interrupts and pre-emption around the entropy extraction?
> >
> > Probably, yes... We can address this in a separate patch.
>
> No, we can't; take a look at invalidate_batched_entropy(), where we
> need invalidate all of per-cpu batched entropy from a single CPU after
> we have initialized the the CRNG.
>
> Since most of the time after CRNG initialization, the spinlock for
> each CPU will be on that CPU's cacheline, the time to take and release
> the spinlock is not going to be material.

So we could get rid of the spin lock by replacing with a "bool"
that is written when we want to do an invalidate on the next call
(where it is read and cleared).

For me it makes a 15 cycle difference (56 vs. 71) for the fast
case when we are just picking a value from the array. The slow
path when we do extract_crng() is barely changed (731 vs 736 cycles).

But I took the "do lazily" comment above invalidate_batched_entropy()
very literally and didn't add any fences to make sure that readers
of need_invalidate see the store ASAP. So a close race where the
invalidate request would have won control of the spin lock might
not get processed until a subsequent call.

If you think a fence is needed, the the advantage will be lost
and the below patch is worthless.

-Tony

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a6b77a850ddd..6fb222996ea4 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2144,7 +2144,7 @@ struct batched_entropy {
u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
};
unsigned int position;
- spinlock_t batch_lock;
+ bool need_invalidate;
};

/*
@@ -2155,9 +2155,7 @@ struct batched_entropy {
* wait_for_random_bytes() should be called and return 0 at least once at any
* point prior.
*/
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
-};
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);

u64 get_random_u64(void)
{
@@ -2168,21 +2166,23 @@ u64 get_random_u64(void)

warn_unseeded_randomness(&previous);

+ local_irq_save(flags);
+ preempt_disable();
batch = raw_cpu_ptr(&batched_entropy_u64);
- spin_lock_irqsave(&batch->batch_lock, flags);
- if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+ if (batch->need_invalidate ||
+ batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
extract_crng((u8 *)batch->entropy_u64);
batch->position = 0;
+ batch->need_invalidate = false;
}
ret = batch->entropy_u64[batch->position++];
- spin_unlock_irqrestore(&batch->batch_lock, flags);
+ preempt_enable();
+ local_irq_restore(flags);
return ret;
}
EXPORT_SYMBOL(get_random_u64);

-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
-};
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
u32 get_random_u32(void)
{
u32 ret;
@@ -2192,14 +2192,18 @@ u32 get_random_u32(void)

warn_unseeded_randomness(&previous);

+ local_irq_save(flags);
+ preempt_disable();
batch = raw_cpu_ptr(&batched_entropy_u32);
- spin_lock_irqsave(&batch->batch_lock, flags);
- if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+ if (batch->need_invalidate ||
+ batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
extract_crng((u8 *)batch->entropy_u32);
batch->position = 0;
+ batch->need_invalidate = false;
}
ret = batch->entropy_u32[batch->position++];
- spin_unlock_irqrestore(&batch->batch_lock, flags);
+ preempt_enable();
+ local_irq_restore(flags);
return ret;
}
EXPORT_SYMBOL(get_random_u32);
@@ -2217,14 +2221,10 @@ static void invalidate_batched_entropy(void)
struct batched_entropy *batched_entropy;

batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
- spin_lock_irqsave(&batched_entropy->batch_lock, flags);
- batched_entropy->position = 0;
- spin_unlock(&batched_entropy->batch_lock);
+ batched_entropy->need_invalidate = true;

batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu);
- spin_lock(&batched_entropy->batch_lock);
- batched_entropy->position = 0;
- spin_unlock_irqrestore(&batched_entropy->batch_lock, flags);
+ batched_entropy->need_invalidate = true;
}
}