diff --git a/drivers/char/random.c b/drivers/char/random.c index 40107f8b9e9e..95f334a824ba 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -7,7 +7,7 @@ * This driver produces cryptographically secure pseudorandom data. It is divided * into roughly six sections, each with a section header: * - * - Initialization and readiness waiting. + * - Initialization and readiness waiting.make * - Fast key erasure RNG, the "crng". * - Entropy accumulation and extraction routines. * - Entropy collection routines. @@ -60,6 +60,7 @@ #include #include + /********************************************************************* * * Initialization and readiness waiting. @@ -95,6 +96,517 @@ static int ratelimit_disable __read_mostly; module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// CONSTANTS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define KEYPOOL_SIZE 1024 +// BLOCK_SIZE is 256, +// but maybe 128 is better for what we are doing. +#define POOL_SIZE BLOCK_SIZE * 4 +#define POOL_SIZE_BITS BLOCK_SIZE * 8 +#define OUTPUT_POOL_SHIFT 10 +#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) +#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) + +//Global runtime entropy +uint8_t runtime_entropy[POOL_SIZE] __latent_entropy; + +// All primes less than 128, used in jumptable multiplcation. +// I don't think we need primes up to 1024 becuase our pool size isn't that large +// But, this is debateable +const int keypool_primes[] = {3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127};//, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, 1019, 1021}; + +static ssize_t extract_crng_user(uint8_t *__user_buf, size_t nbytes); +static void crng_reseed(uint8_t *crng_pool, size_t nbytes); +void _unique_iid(u64 uu_key[], u64 gatekey, size_t nbytes, int rotate); +u64 _alternate_rand(void); + +// By the pidgenhole priciple, no two callers can have the same gatekey. +// If you have the same caller address and the same jiffies - then are you the same source. +// any construction for a gatekey is valid so long as principle above holds because it substitutes the needs for a mutex. +#ifndef __make_gatekey + #define __make_gatekey(new_key)((u64)jiffies ^ (u64)_RET_IP_ ^ (u64)new_key ^ (u64)_THIS_IP_) // ^ _alternate_rand() + //todo: 32bit? + //#else + //#define _gatekey(new_key)((u32)_RET_IP_ << 32 | ((u32)&new_key ^ (u32)_THIS_IP_))|((u32)_THIS_IP_ << 32 | (u32)&new_key); + //#endif +#endif + + +// Rotate bits +// Bits are not lost so there isn't loss of uniquness. +uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + r = r % 64; + return (x << r) | (x >> (64 - r)); +} + + +/* Add two buffers to compound uncertainty + * + * _add_unique() will spray bytes across the pool evenly to create a filed of possibilities + * With each jump more uncertainty is introduced to this field. + * With this shuffling strategy an attacker is forced to work harder, + * and it is O(n) to copy bytes using a jump table or with a linear copy. + * + * This shuffling strategy was built to support the volume of writes created by handle_irq_event_percpu() + * There is no lock over the keypool, and all writes are done via atomic XOR operations. + * Even if a write was lost do to a race condition, it would be difficult to determine what was kept and was wasn't. + * Any effect of a race condition would make it even harder to reconstruct the keypool state. + * + */ +void _add_unique(uint8_t keypool[], int keypool_size, uint64_t gatekey, uint8_t unique[], int unique_size, int nbytes) +{ + int step; + // Write in the first byte that is read by _get_unique() which is in 64 bits. + int next_jump = (gatekey * 8) % (keypool_size / 8); + // Copy bytes with a jump table - O(n) + for(step = 0; step < nbytes; step++) + { + // Check if there is somthing to add. + if(unique[step] != 0){ + // Every byte within keypool_size can be written to at the same time without loosing a write. + keypool[next_jump] ^= unique[step]; + // Save off the jump address before we change it. + next_jump ^= keypool[next_jump]; + // Circular buffer + next_jump = keypool_size % keypool_size; + } + } + // Leave no trace + gatekey = 0; + next_jump = 0; +} + +/* + * The goal of any _get_unique_csprng* is to produce an unpredictable I.I.D. stream + * _get_unique() is meant to be as difficult to predict as possible but, + * it is not fully I.I.D. - and it doesn't need to be. + * +*/ +// Todo: this works but needs to be ported to AES-NI +// AES-NI massivly outperforms chacha20 in a `openssl speedtest` +// Also AES-NI is unlikley to have the kind of backdoor that would undermine this consturciotn. +// paranoid users can configure the kernel to use pure-software to avoid any idological battles. +// handset makers will obviously prefer AES-NI because it will provide better battery life. +/* +void _get_unique_csprng_aes_ni(u8 uu_key[], u64 gatekey, size_t nbytes, int rotate) +{ + struct AES_ctx ctx; + uint8_t aes_key_material[BLOCK_SIZE * 3] __latent_entropy; + uint8_t *aes_key = aes_key_material; + uint8_t *aes_iv = aes_key_material + BLOCK_SIZE; + uint8_t *aes_block = aes_key_material + BLOCK_SIZE * 2; + uint64_t *aes_block_rotate = (uint64_t *)aes_block; + uint64_t *jump_rotate = (uint64_t *) &runtime_entropy; + size_t jump_rotate_size = KEYPOOL_SIZE / 8; + size_t amount_left = nbytes; + size_t chunk = 0; + // Get a new key, iv and preimage from the entropy pool: + _get_unique(runtime_entropy, KEYPOOL_SIZE, gatekey, aes_key_material, sizeof(aes_key_material)); + // Cover our tracks + // Make sure this gatekey + entry location can never be reused: + // No two accessors can generate the same gatekey so this is threadsafe. + _add_unique(runtime_entropy, POOL_SIZE, gatekey, aes_block_rotate, sizeof(gatekey), sizeof(gatekey)); + // Pull 64bits at a time out of the ring function + while( amount_left > 0 ) + { + // account for sizes that are not evenly divisable by BLOCK_SIZE. + chunk = __min(amount_left, BLOCK_SIZE); + // Populate our cipher struct + AES_init_ctx_iv(&ctx, aes_key, aes_iv); + // Encrypt one block with AES-CBC-128: + AES_CBC_encrypt_buffer(&ctx, aes_block, BLOCK_SIZE); + // Copy the first 64bits to the user: + memcpy(uu_key, aes_block, chunk); + amount_left -= chunk; + if(amount_left > 0) + { + // move our copy destination + uu_key += chunk; + if(rotate) + { + // Rotate the key material with the output so that similar keys are never reused: + _add_unique(aes_key_material, BLOCK_SIZE*3, gatekey, aes_block, BLOCK_SIZE, BLOCK_SIZE); + } + // The ciphertext from the previous call to aes() is the plaintext for the next invocation. + } + } + // Cleanup the secrets used + memzero_explicit(&aes_key_material, BLOCK_SIZE*3); + gatekey ^= gatekey; +} +*/ + +/* + * Obtain a uniqueness from the keypool + * + * A lock isn't needed because no two threads will be able to follow the same path. + * We assume this holds true due the pidgen hole principle behind the gatekey generation. + * + * This method is linear O(n), and we want to force our attacker into an exponet. + * KEYPOOL_SIZE * bites is possible entry points (1024*8) + * We have four combinations of these; (1024*8)^4 + * - making a total of 2^52 possible combinations for any given keypool. + * + * The gatekey and state of the keypool is used to derive 4 jump distinct points. + * It is like taking two MRI scans of a sand castle, then putting them in a XOR killidiscope. + * + * Constrants: + * Each of the four layers must be unique, to prevent a^a=0 + * Make sure our jump path to choose layers is distinct from other parallell invocations + * To prevent future repeats of a jump path we overwite our jump index + * + */ +void _get_unique(uint8_t *keypool, int keypool_size, u64 gatekey, uint8_t *unique, size_t nbytes) +{ + size_t step; + uint64_t *keyspace = (uint64_t *) keypool; + uint64_t *product = (uint64_t *) unique; + // We extract out 64bits at a time for performance. + int64_t keypool_size_64 = keypool_size / 8; + uint8_t gate_position = (uint8_t) gatekey % keypool_size_64; + uint8_t jump_offset; + // We need to seed the process with our first jump location + product[0] ^= gatekey; + // A prime is used to maximize the number of reads without repeat + jump_offset = keypool_primes[product[1] % sizeof(keypool_primes)]; + // Pull 64bits at a time out of the ring function + for(step = 0; step < nbytes/8; step++) + { + // Pull the next 64bits from the entropy source: + product[step] ^= keyspace[gate_position]; + // A shift rotate will make our reads less predictable without loosing uniqness + // Here we rotate by an uncertain degree, making our local state more unique + product[step] = rotl64(product[step], unique[step]%64); + // Pick another 64bit chunk that is somewhere else in the pool and doesn't overlap + gate_position = (gate_position + jump_offset) % keypool_size_64; + product[step] ^= keyspace[gate_position]; + // Assume that 'keyspace' is shared, so we add a local rotation + product[step] = rotl64(product[step], unique[step+1]%64); + // Find another point to read from that is distinct. + gate_position = (gate_position + jump_offset) % keypool_size_64; + } +} + + +/* + * This generates a ChaCha block using the provided key, and then + * immediately overwites that key with half the block. It returns + * the resultant ChaCha state to the user, along with the second + * half of the block containing 32 bytes of random data that may + * be used; random_data_len may not be greater than 32. + */ +static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], + u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) +{ + u8 first_block[CHACHA_BLOCK_SIZE]; + + BUG_ON(random_data_len > 32); + + chacha_init_consts(chacha_state); + memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); + memset(&chacha_state[12], 0, sizeof(u32) * 4); + chacha20_block(chacha_state, first_block); + + memcpy(key, first_block, CHACHA_KEY_SIZE); + memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memzero_explicit(first_block, sizeof(first_block)); +} + +/* + * The goal is to produce a very secure source of I.I.D. + * (Independent and identically distributed) + * This is a wrapper to dispatch to whatever primitive is best + */ +void _unique_iid(u64 uu_key[], u64 gatekey, size_t nbytes, int rotate) +{ + // The state just needs to be distinct here - the key is the important bit. + u32 chacha_state[CHACHA_STATE_WORDS] __latent_entropy; + // We could make secret_key into __latent_entropy, but I would prefer speed - the keypool should be enough. + u8 secret_key[CHACHA_KEY_SIZE]; + // lets get ourselves a key from the keypool: + _get_unique(runtime_entropy, KEYPOOL_SIZE, __make_gatekey(uu_key), secret_key, sizeof(secret_key)); + // pass that key to a primitive which can provide an I.I.D. stream + crng_fast_key_erasure(secret_key, chacha_state, (u8 *)uu_key, nbytes); +} + +/* + * The goal here is to be fast + * the user needs less 1 block, they only need two words. + * Lets fill the request as quickly as we can. + * we add __latent_entropy, because we are called early in execution + * it is good to have all the sources we can get. + */ +u64 get_random_u64(void) +{ + u64 anvil; + _unique_iid((u64 *)&anvil, __make_gatekey(&anvil), sizeof(anvil), 0); + return anvil; +} +EXPORT_SYMBOL(get_random_u64); + +/* + * we want to return just one byte as quickly as possible. + * not use in using a 128 or 256-bit cypher for 32 bits + * __make_gatekey is plenty unique for this purpose + * get_random_u32 is for kernal-land consuemrs + */ +u32 get_random_u32(void) +{ + u64 anvil; + _unique_iid(&anvil, __make_gatekey(&anvil), sizeof(anvil), 0); + return (u32)anvil; +} +EXPORT_SYMBOL(get_random_u32); + +/* + * There are many times when we need another opinion. + * Ideally that would come from another source, such as arch_get_random_seed_long() + * When we don't have a arch_get_random_seed_long, then we'll use ourselves as a source. + * + * Failure is not an option - and this output is untrusted. + * The output should be XOR'ed with a random value from a different source. + */ +u64 _alternate_rand() +{ + //Need a source that isn't GCC's latententropy or time. + u64 anvil = 0; + //Try every source we know of, taken from random.c: + if(!arch_get_random_seed_long((unsigned long *)&anvil)) + { + if(!arch_get_random_long((unsigned long *)&anvil)) + { + anvil = random_get_entropy(); + } + } + // anvil might still be zero - + // We can't tell the difference between a zero-roll and a hardware error code. + // Worst case, we are missing everything above + if(anvil == 0) + { + // We cannot fail, in this case we pull from the pool + // This output is used to make a gatekey, so time is used + // No two calls can use the exact same jiffies + &anvil due to the pigeonhole principle + // todo: 32bit? + u64 alternate_gatekey __latent_entropy; + alternate_gatekey ^= (u64)jiffies ^ (u64)&anvil; + _unique_iid(&anvil, alternate_gatekey, sizeof(anvil), 0); + // 'anvil' is a small jump table entropy pool that we can further enrich + _add_unique((char *)&anvil, sizeof(anvil), alternate_gatekey, (uint8_t *)&alternate_gatekey, sizeof(alternate_gatekey), sizeof(alternate_gatekey)); + // cleanup + alternate_gatekey = 0; + } + return anvil; +} + +/* + * Public functon to provide CRNG + * + * - Generate some very hard to guess key material + * - Use the fastest cryptographic primitive available + * - Return CRNG back to the user as quickly as we can + * - Cleanup so we can do this all over again + * + * This is where users get their entropy from the random.c + * device driver (i.e. reading /dev/random) + */ +static ssize_t extract_crng_user(uint8_t *__user_buf, size_t nbytes){ + //If we only need a few bytes these two are the best source. + if(nbytes <= 0){ + return nbytes; + } else { + // Fill the request - no rotate + _unique_iid((u64 *)__user_buf, __make_gatekey(__user_buf), nbytes, 0); + } + //at this point it should not be possible to re-create any part of the PRNG stream used. + return nbytes; +} + +// This is the /dev/urandom variant. +// it is simlar to the algorithm above, but more time is spent procuring stronger key material. +// the user is willing to wait, so we'll do our very best. +// when this method completes, the keypool as a whole is better off, as it will be re-scheduled. + /* + * Be an _unlimited_ random source + * Speed is not an issue + * Provide the very best source possible + * + * Rolling accumulator keys + * Key, IV, and Image accumulate entropy with each operation + * They are never overwritten, only XOR'ed with the previous value + */ + +static ssize_t extract_crng_user_unlimited(uint8_t *__user_buf, size_t nbytes) +{ + //If we only need a few bytes these two are the best source. + if(nbytes <= 0){ + return nbytes; + } else { + // Fill the request - rotate key material: + _unique_iid((u64 *)__user_buf, __make_gatekey(__user_buf), nbytes, 1); + } + //at this point it should not be possible to re-create any part of the PRNG stream used. + return nbytes; +} + +static unsigned long get_reg(u16 reg_idx, struct pt_regs *regs) +{ + unsigned long *ptr = (unsigned long *)regs; + unsigned int idx; + + if (regs == NULL) + return 0; + idx = READ_ONCE(reg_idx); + if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long)) + idx = 0; + ptr += idx++; + WRITE_ONCE(reg_idx, idx); + return *ptr; +} + +/* This function is in fact called more times than I have ever used a phone. + * lets keep this function as light as possible, and move more weight to extract_crng_user() + * if we need to add more computation, then the user requesting the PRNG should pay the price + * any logic added here, means the entire system pays a price. + * Choose your operations wisely. + * + * fast_mix() is fast in name only - mixing can also be handled with encryption. + * + */ +//If there is one function to make lockless, this is the one +void add_interrupt_randomness(int irq) +{ + //Globally unique gatekey + uint64_t gatekey __latent_entropy; + u64 temp_pool[5] __latent_entropy; + struct pt_regs *regs = get_irq_regs(); + // todo: woops flags was removed from this branch... + // Personally I liked adding flags because we should collect as much uninquness as we can. + + //irq_flags contains a few bits, and every bit counts. + //cycles_t cycles = irq_flags; + __u32 c_high, j_high; + __u64 ip = _RET_IP_; + + //todo: woops we don't use 'cycles' anymore in the branch, is there a better source of uniqueness? + //This code is adapted from the old random.c - all O(1) operations + //The interrupt + time gives us 4 bytes. + //if (cycles == 0) + // cycles = get_reg(fast_pool, regs); + //c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; + j_high = (sizeof(jiffies) > 4) ? jiffies >> 32 : 0; + //fast_pool[0] ^= cycles ^ j_high ^ irq; + temp_pool[1] ^= jiffies ^ c_high; + temp_pool[2] ^= ip; + temp_pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : + get_reg((int *)temp_pool, regs); + + // A gatekey will have some hardware randomness when available + // It will be XOR'ed with __latent_entropy to prevent outsider control + gatekey ^= __make_gatekey(&irq); + // Add this unique value to the pool + temp_pool[4] ^= gatekey; + //A single O(1) XOR operation is the best we can get to drip the entropy back into the pool + _add_unique(runtime_entropy, POOL_SIZE, gatekey, (uint8_t *)temp_pool, sizeof(temp_pool), sizeof(temp_pool)); + + //Cleanup + gatekey = 0; +} +EXPORT_SYMBOL_GPL(add_interrupt_randomness); + +/* + * Getting entropy on a fresh system is a hard thing to do. + * So, we will start with latent_entropy, although it isn't required it doesn't hurt. + * Then lets take addresses we know about - add them to the mix + * todo: Fire up the debugger, and look for regions of memory with good data. + * The zero page has hardware identifiers that can be hard to guess. + * Then derive a key the best we can given the degraded state of the pool. + * + * find_more_entropy_in_memory() is called when extract_crng_user can't be used. + * get_random_u32() and get_random_u64() can't be used. + * + */ +static void find_more_entropy_in_memory(uint8_t *crng_pool, int nbytes_needed) +{ + uint8_t *anvil; + // This is early in boot, __latent_entropy is helpful + u64 gatekey __latent_entropy; + gatekey ^= __make_gatekey(&anvil); + int index; + // Pull unique hardware information populated by the bios. + int ZERO_PAGE = 0; + + // a place to forge some entropy - the has a unique offest. + anvil = (uint8_t *)kmalloc(nbytes_needed, __GFP_HIGH); + + // Lets add as many easily accessable unknowns as we can: + // Even without ASLR some addresses can be more difficult to guess than others. + // With ASLR, this would be partially feedback noise, with offsets. + // Add any addresses that are unknown under POOL_SIZE + // 16 addresses for 64-bit is ideal, 32 should use 32 addresses to make 1024 bits. + // Todo: use a debugger to find the 32 hardest to guess addresses. + void *points_of_interest[] = { + ZERO_PAGE, + _RET_IP_, + _THIS_IP_, + anvil, + gatekey + // todo... add to this list. + // maybe pull in from the bss, dss and text memory segments? + }; + + //Gather Runtime Entropy + // - Data from the zero page + // - Memory addresses from the stack and heap and 'anvil' points to the heap. + // - Unset memory on the heap that may contain noise + // - Unallocated memory that maybe have used or in use + //Copy from the zero page, contains HW IDs from the bios + for(index = 0; index < sizeof(points_of_interest); index++){ + void *readPoint = points_of_interest[index]; + // Grab the uniqueness found at this address: + _add_unique(crng_pool, POOL_SIZE, gatekey, (void *)&readPoint, nbytes_needed, nbytes_needed); + // Pull in uniqueness from this page in memory: + // Todo - read the values at this address - we want the contents of the zero page: + //_add_unique(crng_pool, POOL_SIZE, gatekey, readPoint, nbytes_needed, nbytes_needed); + } + + //twigs when wrapped together can become loadbearing + //a samurai sword has many layers of steel. + // I.I.D. means Independent and Identically Distributed + //_unique_iid() might not be not safe at this point + // - but it is unique enough as a seed. + _unique_iid((u64 *)anvil, gatekey, nbytes_needed, 1); + _add_unique(crng_pool, POOL_SIZE, gatekey, anvil, nbytes_needed, nbytes_needed); + + //Clean up our tracks so another process cannot see our source material + memzero_explicit(anvil, nbytes_needed); + gatekey = 0; + kfree(anvil); +} + +static ssize_t +_random_read(int nonblock, char __user *buf, size_t nbytes) +{ + return extract_crng_user(buf, nbytes); +} + +// no blocking +static ssize_t +random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +{ + return extract_crng_user(buf+*ppos, nbytes-*ppos); +} + +static ssize_t +urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +{ + //This is a non-blocking device so we are not going to wait for the pool to fill. + //We will respect the users wishes, and spend time to produce the best output. + return extract_crng_user_unlimited(buf+*ppos, nbytes-*ppos); +} + /* * Returns whether or not the input pool has been seeded and thus guaranteed * to supply cryptographically secure random numbers. This applies to: the @@ -110,30 +622,19 @@ bool rng_is_initialized(void) } EXPORT_SYMBOL(rng_is_initialized); -/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ -static void try_to_generate_entropy(void); - /* - * Wait for the input pool to be seeded and thus guaranteed to supply - * cryptographically secure random numbers. This applies to: the /dev/urandom - * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} - * family of functions. Using any of these functions without first calling - * this function forfeits the guarantee of security. - * - * Returns: 0 if the input pool has been seeded. - * -ERESTARTSYS if the function was interrupted by a signal. + * This funciton is invoked when we are told that we need to find more entropy + * this must be fast, we can't wait around because that will slowdown the entire show. */ int wait_for_random_bytes(void) { - while (!crng_ready()) { - int ret; - - try_to_generate_entropy(); - ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); - if (ret) - return ret > 0 ? 0 : ret; - } - return 0; + // People are wating on us, there is no "try" - you need to go find what we need. + // Because we simply cannot afford to wait, we are using __GFP_HIGH to access uniqness in the heap + // as well as other sources. + // Woops todo - this is a good idea but it's unstable and I need help here: + //find_more_entropy_in_memory(runtime_entropy, POOL_SIZE); + // todo: if find_more_entropy_in_memory() fails then we have bigger problem on our hands. + return 1; } EXPORT_SYMBOL(wait_for_random_bytes); @@ -173,38 +674,6 @@ int unregister_random_ready_notifier(struct notifier_block *nb) return ret; } -static void process_random_ready_list(void) -{ - unsigned long flags; - - spin_lock_irqsave(&random_ready_chain_lock, flags); - raw_notifier_call_chain(&random_ready_chain, 0, NULL); - spin_unlock_irqrestore(&random_ready_chain_lock, flags); -} - -#define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) - -static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) -{ -#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM - const bool print_once = false; -#else - static bool print_once __read_mostly; -#endif - - if (print_once || crng_ready() || - (previous && (caller == READ_ONCE(*previous)))) - return; - WRITE_ONCE(*previous, caller); -#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM - print_once = true; -#endif - if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); -} - /********************************************************************* * @@ -256,260 +725,6 @@ static DEFINE_PER_CPU(struct crng, crngs) = { .lock = INIT_LOCAL_LOCK(crngs.lock), }; -/* Used by crng_reseed() to extract a new seed from the input pool. */ -static bool drain_entropy(void *buf, size_t nbytes, bool force); - -/* - * This extracts a new crng key from the input pool, but only if there is a - * sufficient amount of entropy available or force is true, in order to - * mitigate bruteforcing of newly added bits. - */ -static void crng_reseed(bool force) -{ - unsigned long flags; - unsigned long next_gen; - u8 key[CHACHA_KEY_SIZE]; - bool finalize_init = false; - - /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */ - if (!drain_entropy(key, sizeof(key), force)) - return; - - /* - * We copy the new key into the base_crng, overwriting the old one, - * and update the generation counter. We avoid hitting ULONG_MAX, - * because the per-cpu crngs are initialized to ULONG_MAX, so this - * forces new CPUs that come online to always initialize. - */ - spin_lock_irqsave(&base_crng.lock, flags); - memcpy(base_crng.key, key, sizeof(base_crng.key)); - next_gen = base_crng.generation + 1; - if (next_gen == ULONG_MAX) - ++next_gen; - WRITE_ONCE(base_crng.generation, next_gen); - WRITE_ONCE(base_crng.birth, jiffies); - if (!crng_ready()) { - crng_init = 2; - finalize_init = true; - } - spin_unlock_irqrestore(&base_crng.lock, flags); - memzero_explicit(key, sizeof(key)); - if (finalize_init) { - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } - } -} - -/* - * This generates a ChaCha block using the provided key, and then - * immediately overwites that key with half the block. It returns - * the resultant ChaCha state to the user, along with the second - * half of the block containing 32 bytes of random data that may - * be used; random_data_len may not be greater than 32. - */ -static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], - u32 chacha_state[CHACHA_STATE_WORDS], - u8 *random_data, size_t random_data_len) -{ - u8 first_block[CHACHA_BLOCK_SIZE]; - - BUG_ON(random_data_len > 32); - - chacha_init_consts(chacha_state); - memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); - memset(&chacha_state[12], 0, sizeof(u32) * 4); - chacha20_block(chacha_state, first_block); - - memcpy(key, first_block, CHACHA_KEY_SIZE); - memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); - memzero_explicit(first_block, sizeof(first_block)); -} - -/* - * Return whether the crng seed is considered to be sufficiently - * old that a reseeding might be attempted. This happens if the last - * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at - * an interval proportional to the uptime. - */ -static bool crng_has_old_seed(void) -{ - static bool early_boot = true; - unsigned long interval = CRNG_RESEED_INTERVAL; - - if (unlikely(READ_ONCE(early_boot))) { - time64_t uptime = ktime_get_seconds(); - if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) - WRITE_ONCE(early_boot, false); - else - interval = max_t(unsigned int, 5 * HZ, - (unsigned int)uptime / 2 * HZ); - } - return time_after(jiffies, READ_ONCE(base_crng.birth) + interval); -} - -/* - * This function returns a ChaCha state that you may use for generating - * random data. It also returns up to 32 bytes on its own of random data - * that may be used; random_data_len may not be greater than 32. - */ -static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], - u8 *random_data, size_t random_data_len) -{ - unsigned long flags; - struct crng *crng; - - BUG_ON(random_data_len > 32); - - /* - * For the fast path, we check whether we're ready, unlocked first, and - * then re-check once locked later. In the case where we're really not - * ready, we do fast key erasure with the base_crng directly, because - * this is what crng_pre_init_inject() mutates during early init. - */ - if (!crng_ready()) { - bool ready; - - spin_lock_irqsave(&base_crng.lock, flags); - ready = crng_ready(); - if (!ready) - crng_fast_key_erasure(base_crng.key, chacha_state, - random_data, random_data_len); - spin_unlock_irqrestore(&base_crng.lock, flags); - if (!ready) - return; - } - - /* - * If the base_crng is old enough, we try to reseed, which in turn - * bumps the generation counter that we check below. - */ - if (unlikely(crng_has_old_seed())) - crng_reseed(false); - - local_lock_irqsave(&crngs.lock, flags); - crng = raw_cpu_ptr(&crngs); - - /* - * If our per-cpu crng is older than the base_crng, then it means - * somebody reseeded the base_crng. In that case, we do fast key - * erasure on the base_crng, and use its output as the new key - * for our per-cpu crng. This brings us up to date with base_crng. - */ - if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { - spin_lock(&base_crng.lock); - crng_fast_key_erasure(base_crng.key, chacha_state, - crng->key, sizeof(crng->key)); - crng->generation = base_crng.generation; - spin_unlock(&base_crng.lock); - } - - /* - * Finally, when we've made it this far, our per-cpu crng has an up - * to date key, and we can do fast key erasure with it to produce - * some random data and a ChaCha state for the caller. All other - * branches of this function are "unlikely", so most of the time we - * should wind up here immediately. - */ - crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); - local_unlock_irqrestore(&crngs.lock, flags); -} - -/* - * This function is for crng_init == 0 only. It loads entropy directly - * into the crng's key, without going through the input pool. It is, - * generally speaking, not very safe, but we use this only at early - * boot time when it's better to have something there rather than - * nothing. - * - * If account is set, then the crng_init_cnt counter is incremented. - * This shouldn't be set by functions like add_device_randomness(), - * where we can't trust the buffer passed to it is guaranteed to be - * unpredictable (so it might not have any entropy at all). - * - * Returns the number of bytes processed from input, which is bounded - * by CRNG_INIT_CNT_THRESH if account is true. - */ -static size_t crng_pre_init_inject(const void *input, size_t len, bool account) -{ - static int crng_init_cnt = 0; - struct blake2s_state hash; - unsigned long flags; - - blake2s_init(&hash, sizeof(base_crng.key)); - - spin_lock_irqsave(&base_crng.lock, flags); - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; - } - - if (account) - len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, input, len); - blake2s_final(&hash, base_crng.key); - - if (account) { - crng_init_cnt += len; - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - ++base_crng.generation; - crng_init = 1; - } - } - - spin_unlock_irqrestore(&base_crng.lock, flags); - - if (crng_init == 1) - pr_notice("fast init done\n"); - - return len; -} - -static void _get_random_bytes(void *buf, size_t nbytes) -{ - u32 chacha_state[CHACHA_STATE_WORDS]; - u8 tmp[CHACHA_BLOCK_SIZE]; - size_t len; - - if (!nbytes) - return; - - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, buf, len); - nbytes -= len; - buf += len; - - while (nbytes) { - if (nbytes < CHACHA_BLOCK_SIZE) { - chacha20_block(chacha_state, tmp); - memcpy(buf, tmp, nbytes); - memzero_explicit(tmp, sizeof(tmp)); - break; - } - - chacha20_block(chacha_state, buf); - if (unlikely(chacha_state[12] == 0)) - ++chacha_state[13]; - nbytes -= CHACHA_BLOCK_SIZE; - buf += CHACHA_BLOCK_SIZE; - } - - memzero_explicit(chacha_state, sizeof(chacha_state)); -} - /* * This function is the exported kernel interface. It returns some * number of good random numbers, suitable for key generation, seeding @@ -522,59 +737,10 @@ static void _get_random_bytes(void *buf, size_t nbytes) */ void get_random_bytes(void *buf, size_t nbytes) { - static void *previous; - - warn_unseeded_randomness(&previous); - _get_random_bytes(buf, nbytes); + extract_crng_user(buf, nbytes); } EXPORT_SYMBOL(get_random_bytes); -static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) -{ - bool large_request = nbytes > 256; - ssize_t ret = 0; - size_t len; - u32 chacha_state[CHACHA_STATE_WORDS]; - u8 output[CHACHA_BLOCK_SIZE]; - - if (!nbytes) - return 0; - - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, output, len); - - if (copy_to_user(buf, output, len)) - return -EFAULT; - nbytes -= len; - buf += len; - ret += len; - - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) - break; - schedule(); - } - - chacha20_block(chacha_state, output); - if (unlikely(chacha_state[12] == 0)) - ++chacha_state[13]; - - len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, output, len)) { - ret = -EFAULT; - break; - } - - nbytes -= len; - buf += len; - ret += len; - } - - memzero_explicit(chacha_state, sizeof(chacha_state)); - memzero_explicit(output, sizeof(output)); - return ret; -} /* * Batched entropy returns random integers. The quality of the random @@ -605,68 +771,12 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { .position = UINT_MAX }; -u64 get_random_u64(void) -{ - u64 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u64.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u64); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u64[batch->position]; - batch->entropy_u64[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u64.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), .position = UINT_MAX }; -u32 get_random_u32(void) -{ - u32 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u32.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u32); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u32[batch->position]; - batch->entropy_u32[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u32.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u32); #ifdef CONFIG_SMP /* @@ -749,22 +859,8 @@ EXPORT_SYMBOL(get_random_bytes_arch); /********************************************************************** * - * Entropy accumulation and extraction routines. - * - * Callers may add entropy via: - * - * static void mix_pool_bytes(const void *in, size_t nbytes) - * - * After which, if added entropy should be credited: - * - * static void credit_entropy_bits(size_t nbits) - * - * Finally, extract entropy via these two, with the latter one - * setting the entropy count to zero and extracting only if there - * is POOL_MIN_BITS entropy credited prior or force is true: - * - * static void extract_entropy(void *buf, size_t nbytes) - * static bool drain_entropy(void *buf, size_t nbytes, bool force) + * Entropy cannot be extracted from this new key-scheduling primitive, + * this construction is purly additive. Enjoy. * **********************************************************************/ @@ -788,108 +884,6 @@ static struct { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static void _mix_pool_bytes(const void *in, size_t nbytes) -{ - blake2s_update(&input_pool.hash, in, nbytes); -} - -/* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. - */ -static void mix_pool_bytes(const void *in, size_t nbytes) -{ - unsigned long flags; - - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); - spin_unlock_irqrestore(&input_pool.lock, flags); -} - -static void credit_entropy_bits(size_t nbits) -{ - unsigned int entropy_count, orig, add; - - if (!nbits) - return; - - add = min_t(size_t, nbits, POOL_BITS); - - do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - - if (!crng_ready() && entropy_count >= POOL_MIN_BITS) - crng_reseed(false); -} - -/* - * This is an HKDF-like construction for using the hashed collected entropy - * as a PRF key, that's then expanded block-by-block. - */ -static void extract_entropy(void *buf, size_t nbytes) -{ - unsigned long flags; - u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; - struct { - unsigned long rdseed[32 / sizeof(long)]; - size_t counter; - } block; - size_t i; - - for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { - if (!arch_get_random_seed_long(&block.rdseed[i]) && - !arch_get_random_long(&block.rdseed[i])) - block.rdseed[i] = random_get_entropy(); - } - - spin_lock_irqsave(&input_pool.lock, flags); - - /* seed = HASHPRF(last_key, entropy_input) */ - blake2s_final(&input_pool.hash, seed); - - /* next_key = HASHPRF(seed, RDSEED || 0) */ - block.counter = 0; - blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); - blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); - - spin_unlock_irqrestore(&input_pool.lock, flags); - memzero_explicit(next_key, sizeof(next_key)); - - while (nbytes) { - i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); - /* output = HASHPRF(seed, RDSEED || ++counter) */ - ++block.counter; - blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); - nbytes -= i; - buf += i; - } - - memzero_explicit(seed, sizeof(seed)); - memzero_explicit(&block, sizeof(block)); -} - -/* - * First we make sure we have POOL_MIN_BITS of entropy in the pool unless force - * is true, and then we set the entropy count to zero (but don't actually touch - * any data). Only then can we extract a new key with extract_entropy(). - */ -static bool drain_entropy(void *buf, size_t nbytes, bool force) -{ - unsigned int entropy_count; - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (!force && entropy_count < POOL_MIN_BITS) - return false; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf, nbytes); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - return true; -} - /********************************************************************** * @@ -970,23 +964,11 @@ early_param("random.trust_bootloader", parse_trust_bootloader); */ int __init rand_initialize(void) { - size_t i; - ktime_t now = ktime_get_real(); bool arch_init = true; - unsigned long rv; - for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - _mix_pool_bytes(&rv, sizeof(rv)); - } - _mix_pool_bytes(&now, sizeof(now)); - _mix_pool_bytes(utsname(), sizeof(*(utsname()))); + crng_reseed(runtime_entropy, sizeof(runtime_entropy)); - extract_entropy(base_crng.key, sizeof(base_crng.key)); + extract_crng_user(base_crng.key, sizeof(base_crng.key)); ++base_crng.generation; if (arch_init && trust_cpu && !crng_ready()) { @@ -1011,17 +993,10 @@ int __init rand_initialize(void) */ void add_device_randomness(const void *buf, size_t size) { - cycles_t cycles = random_get_entropy(); - unsigned long flags, now = jiffies; - - if (crng_init == 0 && size) - crng_pre_init_inject(buf, size, false); - - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&cycles, sizeof(cycles)); - _mix_pool_bytes(&now, sizeof(now)); - _mix_pool_bytes(buf, size); - spin_unlock_irqrestore(&input_pool.lock, flags); + // unique gatekey made with the caller's uniqne address. + uint64_t gatekey = __make_gatekey(buf); + // Locks are not needed becuase our gatekey cannot be shared with any other caller. + _add_unique(runtime_entropy, POOL_SIZE, gatekey, buf, size, size); } EXPORT_SYMBOL(add_device_randomness); @@ -1031,86 +1006,37 @@ struct timer_rand_state { long last_delta, last_delta2; }; -/* - * This function adds entropy to the entropy "pool" by using timing - * delays. It uses the timer_rand_state structure to make an estimate - * of how many bits of entropy this call has added to the pool. - * - * The number "num" is also added to the pool - it should somehow describe - * the type of event which just happened. This is currently 0-255 for - * keyboard scan codes, and 256 upwards for interrupts. - */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) -{ - cycles_t cycles = random_get_entropy(); - unsigned long flags, now = jiffies; - long delta, delta2, delta3; - - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&cycles, sizeof(cycles)); - _mix_pool_bytes(&now, sizeof(now)); - _mix_pool_bytes(&num, sizeof(num)); - spin_unlock_irqrestore(&input_pool.lock, flags); - - /* - * Calculate number of bits of randomness we probably added. - * We take into account the first, second and third-order deltas - * in order to make our estimate. - */ - delta = now - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, now); - - delta2 = delta - READ_ONCE(state->last_delta); - WRITE_ONCE(state->last_delta, delta); - - delta3 = delta2 - READ_ONCE(state->last_delta2); - WRITE_ONCE(state->last_delta2, delta2); - - if (delta < 0) - delta = -delta; - if (delta2 < 0) - delta2 = -delta2; - if (delta3 < 0) - delta3 = -delta3; - if (delta > delta2) - delta = delta2; - if (delta > delta3) - delta = delta3; - - /* - * delta is now minimum absolute delta. - * Round down by 1 bit on general principles, - * and limit entropy estimate to 12 bits. - */ - credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11)); -} - +// todo - this could be better, this function is called a lot we need to improve performece here +// ideally we would call _add_unique() once per invocation and call it a day. void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { - static unsigned char last_value; - static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; - - /* Ignore autorepeat and the like. */ - if (value == last_value) - return; - - last_value = value; - add_timer_randomness(&input_timer_state, - (type << 4) ^ code ^ (code >> 4) ^ value); + // unique gatekey made with the caller's uniquness. + // ideally we would use the caller's address, in this case we are pass-by-value + // so this is not an ideal gatekey, but it will do the trick. + uint64_t gatekey = __make_gatekey(&type); + // add the uniqness of the code that we were passed. + _add_unique(runtime_entropy, POOL_SIZE, gatekey, code, sizeof(code), sizeof(code)); + // then add the uniqness of the value, mix up the gatekey to write in a different spot. + // and overlap here isn't a big deal because an attacker cannot guess what we are up to. + _add_unique(runtime_entropy, POOL_SIZE, gatekey+1, value, sizeof(value), sizeof(value)); + // sure why not the type is also unique. Is this too much? + _add_unique(runtime_entropy, POOL_SIZE, gatekey+2, type, sizeof(type), sizeof(type)); } EXPORT_SYMBOL_GPL(add_input_randomness); #ifdef CONFIG_BLOCK void add_disk_randomness(struct gendisk *disk) { + // check if there is work to be done. if (!disk || !disk->random) return; - /* First major is 1, so we get >= 0x200 here. */ - add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); + // Locks are not needed becuase our gatekey cannot be shared with any other caller. + _add_unique(runtime_entropy, POOL_SIZE, __make_gatekey(disk), (uint8_t *)disk->random, sizeof(disk->random), sizeof(disk->random)); } EXPORT_SYMBOL_GPL(add_disk_randomness); +// todo: not sure how to intigrate this one. void rand_initialize_disk(struct gendisk *disk) { struct timer_rand_state *state; @@ -1129,33 +1055,15 @@ void rand_initialize_disk(struct gendisk *disk) /* * Interface for in-kernel drivers of true hardware RNGs. - * Those devices may produce endless random bits and will be throttled - * when our pool is full. + * no throttle - no locks, add it asap - people are waiting. */ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { - if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) { - size_t ret = crng_pre_init_inject(buffer, count, true); - mix_pool_bytes(buffer, ret); - count -= ret; - buffer += ret; - if (!count || crng_init == 0) - return; - } - - /* - * Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below POOL_MIN_BITS, when - * the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has elapsed. - */ - wait_event_interruptible_timeout(random_write_wait, - !system_wq || kthread_should_stop() || - input_pool.entropy_count < POOL_MIN_BITS, - CRNG_RESEED_INTERVAL); - mix_pool_bytes(buffer, count); - credit_entropy_bits(entropy); + // make sure this caller has a unique entry point into the jump table. + uint64_t gatekey = __make_gatekey(buffer); + // add this buffer to the jump table. + _add_unique(runtime_entropy, POOL_SIZE, gatekey, buffer, count, entropy); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); @@ -1184,12 +1092,10 @@ static BLOCKING_NOTIFIER_HEAD(vmfork_chain); */ void add_vmfork_randomness(const void *unique_vm_id, size_t size) { - add_device_randomness(unique_vm_id, size); - if (crng_ready()) { - crng_reseed(true); - pr_notice("crng reseeded due to virtual machine fork\n"); - } - blocking_notifier_call_chain(&vmfork_chain, 0, NULL); + // make sure this caller has a unique entry point into the jump table. + uint64_t gatekey = __make_gatekey(buffer); + // add this buffer to the jump table. + _add_unique(runtime_entropy, POOL_SIZE, gatekey, unique_vm_id, size, size); } #if IS_MODULE(CONFIG_VMGENID) EXPORT_SYMBOL_GPL(add_vmfork_randomness); @@ -1208,51 +1114,6 @@ int unregister_random_vmfork_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier); #endif -struct fast_pool { - struct work_struct mix; - unsigned long pool[4]; - unsigned long last; - unsigned int count; - u16 reg_idx; -}; - -static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { -#ifdef CONFIG_64BIT - /* SipHash constants */ - .pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL, - 0x6c7967656e657261UL, 0x7465646279746573UL } -#else - /* HalfSipHash constants */ - .pool = { 0, 0, 0x6c796765U, 0x74656462U } -#endif -}; - -/* - * This is [Half]SipHash-1-x, starting from an empty key. Because - * the key is fixed, it assumes that its inputs are non-malicious, - * and therefore this has no security on its own. s represents the - * 128 or 256-bit SipHash state, while v represents a 128-bit input. - */ -static void fast_mix(unsigned long s[4], const unsigned long *v) -{ - size_t i; - - for (i = 0; i < 16 / sizeof(long); ++i) { - s[3] ^= v[i]; -#ifdef CONFIG_64BIT - s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); - s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2]; - s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0]; - s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32); -#else - s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); - s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; - s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; - s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); -#endif - s[0] ^= v[i]; - } -} #ifdef CONFIG_SMP /* @@ -1261,175 +1122,12 @@ static void fast_mix(unsigned long s[4], const unsigned long *v) */ int random_online_cpu(unsigned int cpu) { - /* - * During CPU shutdown and before CPU onlining, add_interrupt_ - * randomness() may schedule mix_interrupt_randomness(), and - * set the MIX_INFLIGHT flag. However, because the worker can - * be scheduled on a different CPU during this period, that - * flag will never be cleared. For that reason, we zero out - * the flag here, which runs just after workqueues are onlined - * for the CPU again. This also has the effect of setting the - * irq randomness count to zero so that new accumulated irqs - * are fresh. - */ - per_cpu_ptr(&irq_randomness, cpu)->count = 0; + // todo I need help here... + // per_cpu_ptr(&irq_randomness, cpu)->count = 0; return 0; } #endif -static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs) -{ - unsigned long *ptr = (unsigned long *)regs; - unsigned int idx; - - if (regs == NULL) - return 0; - idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long)) - idx = 0; - ptr += idx++; - WRITE_ONCE(f->reg_idx, idx); - return *ptr; -} - -static void mix_interrupt_randomness(struct work_struct *work) -{ - struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); - /* - * The size of the copied stack pool is explicitly 16 bytes so that we - * tax mix_pool_byte()'s compression function the same amount on all - * platforms. This means on 64-bit we copy half the pool into this, - * while on 32-bit we copy all of it. The entropy is supposed to be - * sufficiently dispersed between bits that in the sponge-like - * half case, on average we don't wind up "losing" some. - */ - u8 pool[16]; - - /* Check to see if we're running on the wrong CPU due to hotplug. */ - local_irq_disable(); - if (fast_pool != this_cpu_ptr(&irq_randomness)) { - local_irq_enable(); - return; - } - - /* - * Copy the pool to the stack so that the mixer always has a - * consistent view, before we reenable irqs again. - */ - memcpy(pool, fast_pool->pool, sizeof(pool)); - fast_pool->count = 0; - fast_pool->last = jiffies; - local_irq_enable(); - - if (unlikely(crng_init == 0)) { - crng_pre_init_inject(pool, sizeof(pool), true); - mix_pool_bytes(pool, sizeof(pool)); - } else { - mix_pool_bytes(pool, sizeof(pool)); - credit_entropy_bits(1); - } - - memzero_explicit(pool, sizeof(pool)); -} - -void add_interrupt_randomness(int irq) -{ - enum { MIX_INFLIGHT = 1U << 31 }; - cycles_t cycles = random_get_entropy(); - unsigned long now = jiffies; - struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); - unsigned int new_count; - union { - u32 u32[4]; - u64 u64[2]; - unsigned long longs[16 / sizeof(long)]; - } irq_data; - - if (cycles == 0) - cycles = get_reg(fast_pool, regs); - - if (sizeof(cycles) == 8) - irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; - else { - irq_data.u32[0] = cycles ^ irq; - irq_data.u32[1] = now; - } - - if (sizeof(unsigned long) == 8) - irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; - else { - irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; - irq_data.u32[3] = get_reg(fast_pool, regs); - } - - fast_mix(fast_pool->pool, irq_data.longs); - new_count = ++fast_pool->count; - - if (new_count & MIX_INFLIGHT) - return; - - if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) || - unlikely(crng_init == 0))) - return; - - if (unlikely(!fast_pool->mix.func)) - INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); - fast_pool->count |= MIX_INFLIGHT; - queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); -} -EXPORT_SYMBOL_GPL(add_interrupt_randomness); - -/* - * Each time the timer fires, we expect that we got an unpredictable - * jump in the cycle counter. Even if the timer is running on another - * CPU, the timer activity will be touching the stack of the CPU that is - * generating entropy.. - * - * Note that we don't re-arm the timer in the timer itself - we are - * happy to be scheduled away, since that just makes the load more - * complex, but we do not want the timer to keep ticking unless the - * entropy loop is running. - * - * So the re-arming always happens in the entropy loop itself. - */ -static void entropy_timer(struct timer_list *t) -{ - credit_entropy_bits(1); -} - -/* - * If we have an actual cycle counter, see if we can - * generate enough entropy with timing noise - */ -static void try_to_generate_entropy(void) -{ - struct { - cycles_t cycles; - struct timer_list timer; - } stack; - - stack.cycles = random_get_entropy(); - - /* Slow counter - or none. Don't even bother */ - if (stack.cycles == random_get_entropy()) - return; - - timer_setup_on_stack(&stack.timer, entropy_timer, 0); - while (!crng_ready() && !signal_pending(current)) { - if (!timer_pending(&stack.timer)) - mod_timer(&stack.timer, jiffies + 1); - mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); - schedule(); - stack.cycles = random_get_entropy(); - } - - del_timer_sync(&stack.timer); - destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); -} - - /********************************************************************** * * Userspace reader/writer interfaces. @@ -1461,29 +1159,8 @@ static void try_to_generate_entropy(void) SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) { - if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) - return -EINVAL; - - /* - * Requesting insecure and blocking randomness at the same time makes - * no sense. - */ - if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) - return -EINVAL; - - if (count > INT_MAX) - count = INT_MAX; - - if (!(flags & GRND_INSECURE) && !crng_ready()) { - int ret; - - if (flags & GRND_NONBLOCK) - return -EAGAIN; - ret = wait_for_random_bytes(); - if (unlikely(ret)) - return ret; - } - return get_random_bytes_user(buf, count); + // born ready. + return extract_crng_user(buf, count); } static __poll_t random_poll(struct file *file, poll_table *wait) @@ -1502,25 +1179,14 @@ static __poll_t random_poll(struct file *file, poll_table *wait) static int write_pool(const char __user *ubuf, size_t count) { - size_t len; - int ret = 0; - u8 block[BLAKE2S_BLOCK_SIZE]; - - while (count) { - len = min(count, sizeof(block)); - if (copy_from_user(block, ubuf, len)) { - ret = -EFAULT; - goto out; - } - count -= len; - ubuf += len; - mix_pool_bytes(block, len); - cond_resched(); - } - -out: - memzero_explicit(block, sizeof(block)); - return ret; + // unique gatekey made using the time and address of the caller. + // No two invocations of write_pool() could ever produce the same gatekey. + uint64_t gatekey = __make_gatekey(ubuf); + // Drop it into the pool! + _add_unique(runtime_entropy, POOL_SIZE, gatekey, ubuf, count, count); + //todo: there needs to be a broader cleanup that i'm not ready for: + //if _add_unique() fails then we have bigger problems on our hands. + return 1; } static ssize_t random_write(struct file *file, const char __user *buffer, @@ -1535,32 +1201,6 @@ static ssize_t random_write(struct file *file, const char __user *buffer, return (ssize_t)count; } -static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - static int maxwarn = 10; - - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) - pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, nbytes); - } - - return get_random_bytes_user(buf, nbytes); -} - -static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - int ret; - - ret = wait_for_random_bytes(); - if (ret != 0) - return ret; - return get_random_bytes_user(buf, nbytes); -} - static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int size, ent_count; @@ -1580,7 +1220,6 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EFAULT; if (ent_count < 0) return -EINVAL; - credit_entropy_bits(ent_count); return 0; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) @@ -1594,7 +1233,6 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) retval = write_pool((const char __user *)p, size); if (retval < 0) return retval; - credit_entropy_bits(ent_count); return 0; case RNDZAPENTCNT: case RNDCLEARPOOL: @@ -1614,7 +1252,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (!crng_ready()) return -ENODATA; - crng_reseed(false); + crng_reseed(runtime_entropy, sizeof(runtime_entropy)); return 0; default: return -EINVAL; @@ -1685,6 +1323,8 @@ static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; static int sysctl_poolsize = POOL_BITS; static u8 sysctl_bootid[UUID_SIZE]; + +// todo: not sure how to fix proc_do_uuid() this shouldn't need a spin lock with the new key scheulding primitive. /* * This function is used to return both the bootid UUID, and random * UUID. The difference is in whether table->data is NULL; if it is,