[PATCH] staging: skein: threefish_block: Use ror64

From: Joe Perches
Date: Fri Mar 18 2016 - 16:32:25 EST


Use the inline instead of direct code to improve readability
and shorten the code a little.

Done with perl:

$ perl -p -i -e 's/\((\w+) \>\> (\d+)\) \| \(\1 \<\< \(64 \- \2\)\)/ror64(\1, \2)/g' drivers/staging/skein/threefish_block.c

Signed-off-by: Joe Perches <joe@xxxxxxxxxxx>
---
drivers/staging/skein/threefish_block.c | 2144 +++++++++++++++----------------
1 file changed, 1072 insertions(+), 1072 deletions(-)

diff --git a/drivers/staging/skein/threefish_block.c b/drivers/staging/skein/threefish_block.c
index e19ac43..a95563f 100644
--- a/drivers/staging/skein/threefish_block.c
+++ b/drivers/staging/skein/threefish_block.c
@@ -512,622 +512,622 @@ void threefish_decrypt_256(struct threefish_key *key_ctx, u64 *input,
b2 -= k0 + t1;
b3 -= k1 + 18;
tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k2;
b1 -= k3 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k4 + t0;
b3 -= k0 + 17;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k1;
b1 -= k2 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k3 + t2;
b3 -= k4 + 16;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k0;
b1 -= k1 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k2 + t1;
b3 -= k3 + 15;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k4;
b1 -= k0 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k1 + t0;
b3 -= k2 + 14;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k3;
b1 -= k4 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k0 + t2;
b3 -= k1 + 13;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k2;
b1 -= k3 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k4 + t1;
b3 -= k0 + 12;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k1;
b1 -= k2 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k3 + t0;
b3 -= k4 + 11;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k0;
b1 -= k1 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k2 + t2;
b3 -= k3 + 10;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k4;
b1 -= k0 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k1 + t1;
b3 -= k2 + 9;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k3;
b1 -= k4 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k0 + t0;
b3 -= k1 + 8;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k2;
b1 -= k3 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k4 + t2;
b3 -= k0 + 7;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k1;
b1 -= k2 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k3 + t1;
b3 -= k4 + 6;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k0;
b1 -= k1 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k2 + t0;
b3 -= k3 + 5;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k4;
b1 -= k0 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k1 + t2;
b3 -= k2 + 4;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k3;
b1 -= k4 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k0 + t1;
b3 -= k1 + 3;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k2;
b1 -= k3 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k4 + t0;
b3 -= k0 + 2;


tmp = b3 ^ b0;
- b3 = (tmp >> 32) | (tmp << (64 - 32));
+ b3 = ror64(tmp, 32);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 32) | (tmp << (64 - 32));
+ b1 = ror64(tmp, 32);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 58) | (tmp << (64 - 58));
+ b1 = ror64(tmp, 58);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 46) | (tmp << (64 - 46));
+ b3 = ror64(tmp, 46);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 12) | (tmp << (64 - 12));
+ b1 = ror64(tmp, 12);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b0 -= b1 + k1;
b1 -= k2 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 33) | (tmp << (64 - 33));
+ b3 = ror64(tmp, 33);
b2 -= b3 + k3 + t2;
b3 -= k4 + 1;

tmp = b3 ^ b0;
- b3 = (tmp >> 5) | (tmp << (64 - 5));
+ b3 = ror64(tmp, 5);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 37) | (tmp << (64 - 37));
+ b1 = ror64(tmp, 37);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b0 -= b1;

tmp = b3 ^ b2;
- b3 = (tmp >> 40) | (tmp << (64 - 40));
+ b3 = ror64(tmp, 40);
b2 -= b3;

tmp = b3 ^ b0;
- b3 = (tmp >> 52) | (tmp << (64 - 52));
+ b3 = ror64(tmp, 52);
b0 -= b3;

tmp = b1 ^ b2;
- b1 = (tmp >> 57) | (tmp << (64 - 57));
+ b1 = ror64(tmp, 57);
b2 -= b1;

tmp = b1 ^ b0;
- b1 = (tmp >> 14) | (tmp << (64 - 14));
+ b1 = ror64(tmp, 14);
b0 -= b1 + k0;
b1 -= k1 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b2 -= b3 + k2 + t1;
b3 -= k3;

@@ -2125,1226 +2125,1226 @@ void threefish_decrypt_512(struct threefish_key *key_ctx, u64 *input,
b7 -= k7 + 18;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k5 + t0;
b7 -= k6 + 17;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k3;
b5 -= k4 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k1;
b3 -= k2;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k8;
b1 -= k0;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k4 + t2;
b7 -= k5 + 16;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k2;
b5 -= k3 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k0;
b3 -= k1;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k7;
b1 -= k8;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k3 + t1;
b7 -= k4 + 15;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k1;
b5 -= k2 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k8;
b3 -= k0;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k6;
b1 -= k7;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k2 + t0;
b7 -= k3 + 14;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k0;
b5 -= k1 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k7;
b3 -= k8;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k5;
b1 -= k6;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k1 + t2;
b7 -= k2 + 13;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k8;
b5 -= k0 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k6;
b3 -= k7;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k4;
b1 -= k5;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k0 + t1;
b7 -= k1 + 12;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k7;
b5 -= k8 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k5;
b3 -= k6;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k3;
b1 -= k4;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k8 + t0;
b7 -= k0 + 11;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k6;
b5 -= k7 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k4;
b3 -= k5;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k2;
b1 -= k3;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k7 + t2;
b7 -= k8 + 10;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k5;
b5 -= k6 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k3;
b3 -= k4;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k1;
b1 -= k2;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k6 + t1;
b7 -= k7 + 9;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k4;
b5 -= k5 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k2;
b3 -= k3;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k0;
b1 -= k1;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k5 + t0;
b7 -= k6 + 8;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k3;
b5 -= k4 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k1;
b3 -= k2;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k8;
b1 -= k0;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k4 + t2;
b7 -= k5 + 7;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k2;
b5 -= k3 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k0;
b3 -= k1;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k7;
b1 -= k8;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k3 + t1;
b7 -= k4 + 6;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k1;
b5 -= k2 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k8;
b3 -= k0;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k6;
b1 -= k7;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k2 + t0;
b7 -= k3 + 5;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k0;
b5 -= k1 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k7;
b3 -= k8;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k5;
b1 -= k6;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k1 + t2;
b7 -= k2 + 4;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k8;
b5 -= k0 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k6;
b3 -= k7;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k4;
b1 -= k5;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k0 + t1;
b7 -= k1 + 3;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k7;
b5 -= k8 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k5;
b3 -= k6;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k3;
b1 -= k4;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k8 + t0;
b7 -= k0 + 2;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k6;
b5 -= k7 + t2;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k4;
b3 -= k5;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k2;
b1 -= k3;

tmp = b3 ^ b4;
- b3 = (tmp >> 22) | (tmp << (64 - 22));
+ b3 = ror64(tmp, 22);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 56) | (tmp << (64 - 56));
+ b5 = ror64(tmp, 56);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 35) | (tmp << (64 - 35));
+ b7 = ror64(tmp, 35);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 8) | (tmp << (64 - 8));
+ b1 = ror64(tmp, 8);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 43) | (tmp << (64 - 43));
+ b7 = ror64(tmp, 43);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 39) | (tmp << (64 - 39));
+ b5 = ror64(tmp, 39);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 29) | (tmp << (64 - 29));
+ b3 = ror64(tmp, 29);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 25) | (tmp << (64 - 25));
+ b1 = ror64(tmp, 25);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 17) | (tmp << (64 - 17));
+ b3 = ror64(tmp, 17);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 10) | (tmp << (64 - 10));
+ b5 = ror64(tmp, 10);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 50) | (tmp << (64 - 50));
+ b7 = ror64(tmp, 50);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 24) | (tmp << (64 - 24));
+ b7 = ror64(tmp, 24);
b6 -= b7 + k7 + t2;
b7 -= k8 + 1;

tmp = b5 ^ b4;
- b5 = (tmp >> 34) | (tmp << (64 - 34));
+ b5 = ror64(tmp, 34);
b4 -= b5 + k5;
b5 -= k6 + t1;

tmp = b3 ^ b2;
- b3 = (tmp >> 30) | (tmp << (64 - 30));
+ b3 = ror64(tmp, 30);
b2 -= b3 + k3;
b3 -= k4;

tmp = b1 ^ b0;
- b1 = (tmp >> 39) | (tmp << (64 - 39));
+ b1 = ror64(tmp, 39);
b0 -= b1 + k1;
b1 -= k2;

tmp = b3 ^ b4;
- b3 = (tmp >> 56) | (tmp << (64 - 56));
+ b3 = ror64(tmp, 56);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 54) | (tmp << (64 - 54));
+ b5 = ror64(tmp, 54);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 9) | (tmp << (64 - 9));
+ b7 = ror64(tmp, 9);
b0 -= b7;

tmp = b1 ^ b6;
- b1 = (tmp >> 44) | (tmp << (64 - 44));
+ b1 = ror64(tmp, 44);
b6 -= b1;

tmp = b7 ^ b2;
- b7 = (tmp >> 39) | (tmp << (64 - 39));
+ b7 = ror64(tmp, 39);
b2 -= b7;

tmp = b5 ^ b0;
- b5 = (tmp >> 36) | (tmp << (64 - 36));
+ b5 = ror64(tmp, 36);
b0 -= b5;

tmp = b3 ^ b6;
- b3 = (tmp >> 49) | (tmp << (64 - 49));
+ b3 = ror64(tmp, 49);
b6 -= b3;

tmp = b1 ^ b4;
- b1 = (tmp >> 17) | (tmp << (64 - 17));
+ b1 = ror64(tmp, 17);
b4 -= b1;

tmp = b3 ^ b0;
- b3 = (tmp >> 42) | (tmp << (64 - 42));
+ b3 = ror64(tmp, 42);
b0 -= b3;

tmp = b5 ^ b6;
- b5 = (tmp >> 14) | (tmp << (64 - 14));
+ b5 = ror64(tmp, 14);
b6 -= b5;

tmp = b7 ^ b4;
- b7 = (tmp >> 27) | (tmp << (64 - 27));
+ b7 = ror64(tmp, 27);
b4 -= b7;

tmp = b1 ^ b2;
- b1 = (tmp >> 33) | (tmp << (64 - 33));
+ b1 = ror64(tmp, 33);
b2 -= b1;

tmp = b7 ^ b6;
- b7 = (tmp >> 37) | (tmp << (64 - 37));
+ b7 = ror64(tmp, 37);
b6 -= b7 + k6 + t1;
b7 -= k7;

tmp = b5 ^ b4;
- b5 = (tmp >> 19) | (tmp << (64 - 19));
+ b5 = ror64(tmp, 19);
b4 -= b5 + k4;
b5 -= k5 + t0;

tmp = b3 ^ b2;
- b3 = (tmp >> 36) | (tmp << (64 - 36));
+ b3 = ror64(tmp, 36);
b2 -= b3 + k2;
b3 -= k3;

tmp = b1 ^ b0;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b0 -= b1 + k0;
b1 -= k1;

@@ -5521,2722 +5521,2722 @@ void threefish_decrypt_1024(struct threefish_key *key_ctx, u64 *input,
b14 -= k0 + t0;
b15 -= k1 + 20;
tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k16 + t2;
b15 -= k0 + 19;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k14;
b13 -= k15 + t1;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k12;
b11 -= k13;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k10;
b9 -= k11;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k8;
b7 -= k9;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k6;
b5 -= k7;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k4;
b3 -= k5;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k2;
b1 -= k3;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k15 + t1;
b15 -= k16 + 18;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k13;
b13 -= k14 + t0;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k11;
b11 -= k12;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k9;
b9 -= k10;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k7;
b7 -= k8;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k5;
b5 -= k6;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k3;
b3 -= k4;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k1;
b1 -= k2;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k14 + t0;
b15 -= k15 + 17;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k12;
b13 -= k13 + t2;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k10;
b11 -= k11;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k8;
b9 -= k9;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k6;
b7 -= k7;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k4;
b5 -= k5;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k2;
b3 -= k3;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k0;
b1 -= k1;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k13 + t2;
b15 -= k14 + 16;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k11;
b13 -= k12 + t1;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k9;
b11 -= k10;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k7;
b9 -= k8;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k5;
b7 -= k6;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k3;
b5 -= k4;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k1;
b3 -= k2;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k16;
b1 -= k0;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k12 + t1;
b15 -= k13 + 15;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k10;
b13 -= k11 + t0;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k8;
b11 -= k9;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k6;
b9 -= k7;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k4;
b7 -= k5;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k2;
b5 -= k3;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k0;
b3 -= k1;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k15;
b1 -= k16;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k11 + t0;
b15 -= k12 + 14;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k9;
b13 -= k10 + t2;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k7;
b11 -= k8;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k5;
b9 -= k6;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k3;
b7 -= k4;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k1;
b5 -= k2;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k16;
b3 -= k0;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k14;
b1 -= k15;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k10 + t2;
b15 -= k11 + 13;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k8;
b13 -= k9 + t1;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k6;
b11 -= k7;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k4;
b9 -= k5;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k2;
b7 -= k3;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k0;
b5 -= k1;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k15;
b3 -= k16;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k13;
b1 -= k14;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k9 + t1;
b15 -= k10 + 12;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k7;
b13 -= k8 + t0;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k5;
b11 -= k6;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k3;
b9 -= k4;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k1;
b7 -= k2;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k16;
b5 -= k0;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k14;
b3 -= k15;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k12;
b1 -= k13;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k8 + t0;
b15 -= k9 + 11;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k6;
b13 -= k7 + t2;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k4;
b11 -= k5;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k2;
b9 -= k3;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k0;
b7 -= k1;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k15;
b5 -= k16;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k13;
b3 -= k14;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k11;
b1 -= k12;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k7 + t2;
b15 -= k8 + 10;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k5;
b13 -= k6 + t1;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k3;
b11 -= k4;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k1;
b9 -= k2;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k16;
b7 -= k0;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k14;
b5 -= k15;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k12;
b3 -= k13;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k10;
b1 -= k11;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k6 + t1;
b15 -= k7 + 9;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k4;
b13 -= k5 + t0;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k2;
b11 -= k3;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k0;
b9 -= k1;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k15;
b7 -= k16;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k13;
b5 -= k14;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k11;
b3 -= k12;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k9;
b1 -= k10;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k5 + t0;
b15 -= k6 + 8;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k3;
b13 -= k4 + t2;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k1;
b11 -= k2;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k16;
b9 -= k0;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k14;
b7 -= k15;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k12;
b5 -= k13;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k10;
b3 -= k11;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k8;
b1 -= k9;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k4 + t2;
b15 -= k5 + 7;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k2;
b13 -= k3 + t1;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k0;
b11 -= k1;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k15;
b9 -= k16;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k13;
b7 -= k14;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k11;
b5 -= k12;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k9;
b3 -= k10;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k7;
b1 -= k8;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k3 + t1;
b15 -= k4 + 6;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k1;
b13 -= k2 + t0;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k16;
b11 -= k0;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k14;
b9 -= k15;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k12;
b7 -= k13;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k10;
b5 -= k11;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k8;
b3 -= k9;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k6;
b1 -= k7;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k2 + t0;
b15 -= k3 + 5;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k0;
b13 -= k1 + t2;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k15;
b11 -= k16;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k13;
b9 -= k14;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k11;
b7 -= k12;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k9;
b5 -= k10;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k7;
b3 -= k8;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k5;
b1 -= k6;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k1 + t2;
b15 -= k2 + 4;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k16;
b13 -= k0 + t1;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k14;
b11 -= k15;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k12;
b9 -= k13;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k10;
b7 -= k11;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k8;
b5 -= k9;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k6;
b3 -= k7;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k4;
b1 -= k5;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k0 + t1;
b15 -= k1 + 3;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k15;
b13 -= k16 + t0;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k13;
b11 -= k14;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k11;
b9 -= k12;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k9;
b7 -= k10;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k7;
b5 -= k8;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k5;
b3 -= k6;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k3;
b1 -= k4;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k16 + t0;
b15 -= k0 + 2;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k14;
b13 -= k15 + t2;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k12;
b11 -= k13;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k10;
b9 -= k11;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k8;
b7 -= k9;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k6;
b5 -= k7;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k4;
b3 -= k5;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k2;
b1 -= k3;

tmp = b7 ^ b12;
- b7 = (tmp >> 20) | (tmp << (64 - 20));
+ b7 = ror64(tmp, 20);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 37) | (tmp << (64 - 37));
+ b3 = ror64(tmp, 37);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 31) | (tmp << (64 - 31));
+ b5 = ror64(tmp, 31);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 23) | (tmp << (64 - 23));
+ b1 = ror64(tmp, 23);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 52) | (tmp << (64 - 52));
+ b9 = ror64(tmp, 52);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 35) | (tmp << (64 - 35));
+ b13 = ror64(tmp, 35);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 48) | (tmp << (64 - 48));
+ b11 = ror64(tmp, 48);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 9) | (tmp << (64 - 9));
+ b15 = ror64(tmp, 9);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 25) | (tmp << (64 - 25));
+ b9 = ror64(tmp, 25);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 44) | (tmp << (64 - 44));
+ b11 = ror64(tmp, 44);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 42) | (tmp << (64 - 42));
+ b13 = ror64(tmp, 42);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 19) | (tmp << (64 - 19));
+ b15 = ror64(tmp, 19);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 46) | (tmp << (64 - 46));
+ b1 = ror64(tmp, 46);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 47) | (tmp << (64 - 47));
+ b3 = ror64(tmp, 47);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 44) | (tmp << (64 - 44));
+ b5 = ror64(tmp, 44);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 42) | (tmp << (64 - 42));
+ b5 = ror64(tmp, 42);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 53) | (tmp << (64 - 53));
+ b3 = ror64(tmp, 53);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 4) | (tmp << (64 - 4));
+ b7 = ror64(tmp, 4);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 51) | (tmp << (64 - 51));
+ b15 = ror64(tmp, 51);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 56) | (tmp << (64 - 56));
+ b11 = ror64(tmp, 56);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 34) | (tmp << (64 - 34));
+ b13 = ror64(tmp, 34);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 16) | (tmp << (64 - 16));
+ b9 = ror64(tmp, 16);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 30) | (tmp << (64 - 30));
+ b15 = ror64(tmp, 30);
b14 -= b15 + k15 + t2;
b15 -= k16 + 1;

tmp = b13 ^ b12;
- b13 = (tmp >> 44) | (tmp << (64 - 44));
+ b13 = ror64(tmp, 44);
b12 -= b13 + k13;
b13 -= k14 + t1;

tmp = b11 ^ b10;
- b11 = (tmp >> 47) | (tmp << (64 - 47));
+ b11 = ror64(tmp, 47);
b10 -= b11 + k11;
b11 -= k12;

tmp = b9 ^ b8;
- b9 = (tmp >> 12) | (tmp << (64 - 12));
+ b9 = ror64(tmp, 12);
b8 -= b9 + k9;
b9 -= k10;

tmp = b7 ^ b6;
- b7 = (tmp >> 31) | (tmp << (64 - 31));
+ b7 = ror64(tmp, 31);
b6 -= b7 + k7;
b7 -= k8;

tmp = b5 ^ b4;
- b5 = (tmp >> 37) | (tmp << (64 - 37));
+ b5 = ror64(tmp, 37);
b4 -= b5 + k5;
b5 -= k6;

tmp = b3 ^ b2;
- b3 = (tmp >> 9) | (tmp << (64 - 9));
+ b3 = ror64(tmp, 9);
b2 -= b3 + k3;
b3 -= k4;

tmp = b1 ^ b0;
- b1 = (tmp >> 41) | (tmp << (64 - 41));
+ b1 = ror64(tmp, 41);
b0 -= b1 + k1;
b1 -= k2;

tmp = b7 ^ b12;
- b7 = (tmp >> 25) | (tmp << (64 - 25));
+ b7 = ror64(tmp, 25);
b12 -= b7;

tmp = b3 ^ b10;
- b3 = (tmp >> 16) | (tmp << (64 - 16));
+ b3 = ror64(tmp, 16);
b10 -= b3;

tmp = b5 ^ b8;
- b5 = (tmp >> 28) | (tmp << (64 - 28));
+ b5 = ror64(tmp, 28);
b8 -= b5;

tmp = b1 ^ b14;
- b1 = (tmp >> 47) | (tmp << (64 - 47));
+ b1 = ror64(tmp, 47);
b14 -= b1;

tmp = b9 ^ b4;
- b9 = (tmp >> 41) | (tmp << (64 - 41));
+ b9 = ror64(tmp, 41);
b4 -= b9;

tmp = b13 ^ b6;
- b13 = (tmp >> 48) | (tmp << (64 - 48));
+ b13 = ror64(tmp, 48);
b6 -= b13;

tmp = b11 ^ b2;
- b11 = (tmp >> 20) | (tmp << (64 - 20));
+ b11 = ror64(tmp, 20);
b2 -= b11;

tmp = b15 ^ b0;
- b15 = (tmp >> 5) | (tmp << (64 - 5));
+ b15 = ror64(tmp, 5);
b0 -= b15;

tmp = b9 ^ b10;
- b9 = (tmp >> 17) | (tmp << (64 - 17));
+ b9 = ror64(tmp, 17);
b10 -= b9;

tmp = b11 ^ b8;
- b11 = (tmp >> 59) | (tmp << (64 - 59));
+ b11 = ror64(tmp, 59);
b8 -= b11;

tmp = b13 ^ b14;
- b13 = (tmp >> 41) | (tmp << (64 - 41));
+ b13 = ror64(tmp, 41);
b14 -= b13;

tmp = b15 ^ b12;
- b15 = (tmp >> 34) | (tmp << (64 - 34));
+ b15 = ror64(tmp, 34);
b12 -= b15;

tmp = b1 ^ b6;
- b1 = (tmp >> 13) | (tmp << (64 - 13));
+ b1 = ror64(tmp, 13);
b6 -= b1;

tmp = b3 ^ b4;
- b3 = (tmp >> 51) | (tmp << (64 - 51));
+ b3 = ror64(tmp, 51);
b4 -= b3;

tmp = b5 ^ b2;
- b5 = (tmp >> 4) | (tmp << (64 - 4));
+ b5 = ror64(tmp, 4);
b2 -= b5;

tmp = b7 ^ b0;
- b7 = (tmp >> 33) | (tmp << (64 - 33));
+ b7 = ror64(tmp, 33);
b0 -= b7;

tmp = b1 ^ b8;
- b1 = (tmp >> 52) | (tmp << (64 - 52));
+ b1 = ror64(tmp, 52);
b8 -= b1;

tmp = b5 ^ b14;
- b5 = (tmp >> 23) | (tmp << (64 - 23));
+ b5 = ror64(tmp, 23);
b14 -= b5;

tmp = b3 ^ b12;
- b3 = (tmp >> 18) | (tmp << (64 - 18));
+ b3 = ror64(tmp, 18);
b12 -= b3;

tmp = b7 ^ b10;
- b7 = (tmp >> 49) | (tmp << (64 - 49));
+ b7 = ror64(tmp, 49);
b10 -= b7;

tmp = b15 ^ b4;
- b15 = (tmp >> 55) | (tmp << (64 - 55));
+ b15 = ror64(tmp, 55);
b4 -= b15;

tmp = b11 ^ b6;
- b11 = (tmp >> 10) | (tmp << (64 - 10));
+ b11 = ror64(tmp, 10);
b6 -= b11;

tmp = b13 ^ b2;
- b13 = (tmp >> 19) | (tmp << (64 - 19));
+ b13 = ror64(tmp, 19);
b2 -= b13;

tmp = b9 ^ b0;
- b9 = (tmp >> 38) | (tmp << (64 - 38));
+ b9 = ror64(tmp, 38);
b0 -= b9;

tmp = b15 ^ b14;
- b15 = (tmp >> 37) | (tmp << (64 - 37));
+ b15 = ror64(tmp, 37);
b14 -= b15 + k14 + t1;
b15 -= k15;

tmp = b13 ^ b12;
- b13 = (tmp >> 22) | (tmp << (64 - 22));
+ b13 = ror64(tmp, 22);
b12 -= b13 + k12;
b13 -= k13 + t0;

tmp = b11 ^ b10;
- b11 = (tmp >> 17) | (tmp << (64 - 17));
+ b11 = ror64(tmp, 17);
b10 -= b11 + k10;
b11 -= k11;

tmp = b9 ^ b8;
- b9 = (tmp >> 8) | (tmp << (64 - 8));
+ b9 = ror64(tmp, 8);
b8 -= b9 + k8;
b9 -= k9;

tmp = b7 ^ b6;
- b7 = (tmp >> 47) | (tmp << (64 - 47));
+ b7 = ror64(tmp, 47);
b6 -= b7 + k6;
b7 -= k7;

tmp = b5 ^ b4;
- b5 = (tmp >> 8) | (tmp << (64 - 8));
+ b5 = ror64(tmp, 8);
b4 -= b5 + k4;
b5 -= k5;

tmp = b3 ^ b2;
- b3 = (tmp >> 13) | (tmp << (64 - 13));
+ b3 = ror64(tmp, 13);
b2 -= b3 + k2;
b3 -= k3;

tmp = b1 ^ b0;
- b1 = (tmp >> 24) | (tmp << (64 - 24));
+ b1 = ror64(tmp, 24);
b0 -= b1 + k0;
b1 -= k1;

--
2.6.3.368.gf34be46