Re: random: Benchamrking fast_mix2
From: Theodore Ts'o
Date: Sat Jun 14 2014 - 02:25:28 EST
OK, so I normally do my testing using 32-bit kernels under KVM. On a
i386 kernel, running under kvm (again using a Lenovo T540 with a
i7-4900MQ CPU), with the original fast_mix, I get a timestamp count of
166 cycles using a weighted smoothed average. Using your fast_mix2 I
get around 450 cycles.
I'll try a 64-bit kernel build later this weekend.
Want to give this patch a try on your collection of machines?
- Ted
P.S. ADD_INTERRUPT_BENCH works only on x86 architectures because
random_get_entropy() mapes to get_cycles() which is RDTSC. It won't
work on many other architectures, which don't have a timestamp
counter.
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 4bb6e37..90a1c9f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -267,6 +267,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/random.h>
+#define ADD_INTERRUPT_BENCH
+
/*
* Configuration information
*/
@@ -553,6 +555,7 @@ struct fast_pool {
unsigned char last_timer_intr;
};
+#ifdef ORIG_FAST_MIX
/*
* This is a fast mixing routine used by the interrupt randomness
* collector. It's hardcoded for an 128 bit pool and assumes that any
@@ -579,6 +582,34 @@ static void fast_mix(struct fast_pool *f, __u32 input[4])
f->rotate = input_rotate;
f->count++;
}
+#else
+extern void fast_mix(struct fast_pool *f, __u32 const input[4])
+{
+ __u32 a = f->pool[0] ^ input[0], b = f->pool[1] ^ input[1];
+ __u32 c = f->pool[2] ^ input[2], d = f->pool[3] ^ input[3];
+ int i;
+
+
+ for (i = 0; i < 3; i++) {
+ /*
+ * Inspired by ChaCha's QuarterRound, but
+ * modified for much greater parallelism.
+ * Surprisingly, rotating a and c seems to work
+ * better than b and d. And it runs faster.
+ */
+ a += b; c += d;
+ d ^= a; b ^= c;
+ a = rol32(a, 15); c = rol32(c, 21);
+
+ a += b; c += d;
+ d ^= a; b ^= c;
+ a = rol32(a, 3); c = rol32(c, 7);
+ }
+ f->pool[0] = a; f->pool[1] = b;
+ f->pool[2] = c; f->pool[3] = d;
+ f->count++;
+}
+#endif
/*
* Credit (or debit) the entropy store with n bits of entropy.
@@ -829,6 +860,27 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
+#ifdef ADD_INTERRUPT_BENCH
+static unsigned long avg_cycles;
+
+#define FSHIFT 11 /* nr of bits of precision */
+#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
+#define EXP 2040
+
+static void add_interrupt_bench(cycles_t start)
+{
+ cycles_t duration = random_get_entropy() - start;
+
+ /* use a weighted moving average */
+ avg_cycles *= EXP;
+ avg_cycles += duration * (FIXED_1 - EXP);
+ avg_cycles += 1UL << (FSHIFT - 1);
+ avg_cycles >>= FSHIFT;
+}
+#else
+#define add_interrupt_bench(x)
+#endif
+
void add_interrupt_randomness(int irq, int irq_flags)
{
struct entropy_store *r;
@@ -850,6 +902,7 @@ void add_interrupt_randomness(int irq, int irq_flags)
input[3] = ip >> 32;
fast_mix(fast_pool, input);
+ add_interrupt_bench(cycles);
if ((fast_pool->count & 63) && !time_after(now, fast_pool->last + HZ))
return;
@@ -1644,6 +1697,15 @@ struct ctl_table random_table[] = {
.mode = 0444,
.proc_handler = proc_do_uuid,
},
+#ifdef ADD_INTERRUPT_BENCH
+ {
+ .procname = "add_interrupt_avg_cycles",
+ .data = &avg_cycles,
+ .maxlen = sizeof(avg_cycles),
+ .mode = 0444,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+#endif
{ }
};
#endif /* CONFIG_SYSCTL */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/