Re: [PATCH v2] lib/crypto: blake2s: avoid indirect calls to compression function for Clang CFI

From: Nick Desaulniers
Date: Mon Jan 24 2022 - 16:27:31 EST


On Mon, Jan 24, 2022 at 11:29 AM Jason A. Donenfeld <Jason@xxxxxxxxx> wrote:
>
> blake2s_compress_generic is weakly aliased by blake2s_generic. The
> current harness for function selection uses a function pointer, which is
> ordinarily inlined and resolved at compile time. But when Clang's CFI is
> enabled, CFI still triggers when making an indirect call via a weak
> symbol. This seems like a bug in Clang's CFI, as though it's bucketing
> weak symbols and strong symbols differently. It also only seems to
> trigger when "full LTO" mode is used, rather than "thin LTO".
>
> [ 0.000000][ T0] Kernel panic - not syncing: CFI failure (target: blake2s_compress_generic+0x0/0x1444)
> [ 0.000000][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-mainline-06981-g076c855b846e #1
> [ 0.000000][ T0] Hardware name: MT6873 (DT)
> [ 0.000000][ T0] Call trace:
> [ 0.000000][ T0] dump_backtrace+0xfc/0x1dc
> [ 0.000000][ T0] dump_stack_lvl+0xa8/0x11c
> [ 0.000000][ T0] panic+0x194/0x464
> [ 0.000000][ T0] __cfi_check_fail+0x54/0x58
> [ 0.000000][ T0] __cfi_slowpath_diag+0x354/0x4b0
> [ 0.000000][ T0] blake2s_update+0x14c/0x178
> [ 0.000000][ T0] _extract_entropy+0xf4/0x29c
> [ 0.000000][ T0] crng_initialize_primary+0x24/0x94
> [ 0.000000][ T0] rand_initialize+0x2c/0x6c
> [ 0.000000][ T0] start_kernel+0x2f8/0x65c
> [ 0.000000][ T0] __primary_switched+0xc4/0x7be4
> [ 0.000000][ T0] Rebooting in 5 seconds..
>
> Nonetheless, the function pointer method isn't so terrific anyway, so
> this patch replaces it with a simple boolean, which also gets inlined
> away. This successfully works around the Clang bug.

Acked-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>

Thanks for the report. Once we have a fix in hand for LLVM, we can
revisit removing this and raising the required LLVM version for CFI.

>
> In general, I'm not too keen on all of the indirection involved here; it
> clearly does more harm than good. Hopefully the whole thing can get
> cleaned up down the road when lib/crypto is overhauled more
> comprehensively. But for now, we go with a simple bandaid.
>
> Fixes: 6048fdcc5f26 ("lib/crypto: blake2s: include as built-in")
> Reported-by: Miles Chen <miles.chen@xxxxxxxxxxxx>
> Tested-by: Miles Chen <miles.chen@xxxxxxxxxxxx>
> Tested-by: Nathan Chancellor <nathan@xxxxxxxxxx>
> Link: https://github.com/ClangBuiltLinux/linux/issues/1567
> Cc: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> Cc: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
> Cc: Ard Biesheuvel <ardb@xxxxxxxxxx>
> Signed-off-by: Jason A. Donenfeld <Jason@xxxxxxxxx>
> ---
> Changes v1->v2:
> - Wrapped columns at 80 for Eric.
>
> arch/arm/crypto/blake2s-shash.c | 4 ++--
> arch/x86/crypto/blake2s-shash.c | 4 ++--
> crypto/blake2s_generic.c | 4 ++--
> include/crypto/internal/blake2s.h | 40 +++++++++++++++++++------------
> lib/crypto/blake2s.c | 4 ++--
> 5 files changed, 33 insertions(+), 23 deletions(-)
>
> diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c
> index 17c1c3bfe2f5..763c73beea2d 100644
> --- a/arch/arm/crypto/blake2s-shash.c
> +++ b/arch/arm/crypto/blake2s-shash.c
> @@ -13,12 +13,12 @@
> static int crypto_blake2s_update_arm(struct shash_desc *desc,
> const u8 *in, unsigned int inlen)
> {
> - return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
> + return crypto_blake2s_update(desc, in, inlen, false);
> }
>
> static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
> {
> - return crypto_blake2s_final(desc, out, blake2s_compress);
> + return crypto_blake2s_final(desc, out, false);
> }
>
> #define BLAKE2S_ALG(name, driver_name, digest_size) \
> diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c
> index f9e2fecdb761..59ae28abe35c 100644
> --- a/arch/x86/crypto/blake2s-shash.c
> +++ b/arch/x86/crypto/blake2s-shash.c
> @@ -18,12 +18,12 @@
> static int crypto_blake2s_update_x86(struct shash_desc *desc,
> const u8 *in, unsigned int inlen)
> {
> - return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
> + return crypto_blake2s_update(desc, in, inlen, false);
> }
>
> static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
> {
> - return crypto_blake2s_final(desc, out, blake2s_compress);
> + return crypto_blake2s_final(desc, out, false);
> }
>
> #define BLAKE2S_ALG(name, driver_name, digest_size) \
> diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
> index 72fe480f9bd6..5f96a21f8788 100644
> --- a/crypto/blake2s_generic.c
> +++ b/crypto/blake2s_generic.c
> @@ -15,12 +15,12 @@
> static int crypto_blake2s_update_generic(struct shash_desc *desc,
> const u8 *in, unsigned int inlen)
> {
> - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic);
> + return crypto_blake2s_update(desc, in, inlen, true);
> }
>
> static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
> {
> - return crypto_blake2s_final(desc, out, blake2s_compress_generic);
> + return crypto_blake2s_final(desc, out, true);
> }
>
> #define BLAKE2S_ALG(name, driver_name, digest_size) \
> diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
> index d39cfa0d333e..52363eee2b20 100644
> --- a/include/crypto/internal/blake2s.h
> +++ b/include/crypto/internal/blake2s.h
> @@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state)
> state->f[0] = -1;
> }
>
> -typedef void (*blake2s_compress_t)(struct blake2s_state *state,
> - const u8 *block, size_t nblocks, u32 inc);
> -
> /* Helper functions for BLAKE2s shared by the library and shash APIs */
>
> -static inline void __blake2s_update(struct blake2s_state *state,
> - const u8 *in, size_t inlen,
> - blake2s_compress_t compress)
> +static __always_inline void
> +__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
> + bool force_generic)
> {
> const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
>
> @@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state,
> return;
> if (inlen > fill) {
> memcpy(state->buf + state->buflen, in, fill);
> - (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
> + if (force_generic)
> + blake2s_compress_generic(state, state->buf, 1,
> + BLAKE2S_BLOCK_SIZE);
> + else
> + blake2s_compress(state, state->buf, 1,
> + BLAKE2S_BLOCK_SIZE);
> state->buflen = 0;
> in += fill;
> inlen -= fill;
> @@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state,
> if (inlen > BLAKE2S_BLOCK_SIZE) {
> const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
> /* Hash one less (full) block than strictly possible */
> - (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
> + if (force_generic)
> + blake2s_compress_generic(state, in, nblocks - 1,
> + BLAKE2S_BLOCK_SIZE);
> + else
> + blake2s_compress(state, in, nblocks - 1,
> + BLAKE2S_BLOCK_SIZE);
> in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
> inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
> }
> @@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state,
> state->buflen += inlen;
> }
>
> -static inline void __blake2s_final(struct blake2s_state *state, u8 *out,
> - blake2s_compress_t compress)
> +static __always_inline void
> +__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
> {
> blake2s_set_lastblock(state);
> memset(state->buf + state->buflen, 0,
> BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
> - (*compress)(state, state->buf, 1, state->buflen);
> + if (force_generic)
> + blake2s_compress_generic(state, state->buf, 1, state->buflen);
> + else
> + blake2s_compress(state, state->buf, 1, state->buflen);
> cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
> memcpy(out, state->h, state->outlen);
> }
> @@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc)
>
> static inline int crypto_blake2s_update(struct shash_desc *desc,
> const u8 *in, unsigned int inlen,
> - blake2s_compress_t compress)
> + bool force_generic)
> {
> struct blake2s_state *state = shash_desc_ctx(desc);
>
> - __blake2s_update(state, in, inlen, compress);
> + __blake2s_update(state, in, inlen, force_generic);
> return 0;
> }
>
> static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
> - blake2s_compress_t compress)
> + bool force_generic)
> {
> struct blake2s_state *state = shash_desc_ctx(desc);
>
> - __blake2s_final(state, out, compress);
> + __blake2s_final(state, out, force_generic);
> return 0;
> }
>
> diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
> index 9364f79937b8..c71c09621c09 100644
> --- a/lib/crypto/blake2s.c
> +++ b/lib/crypto/blake2s.c
> @@ -18,14 +18,14 @@
>
> void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
> {
> - __blake2s_update(state, in, inlen, blake2s_compress);
> + __blake2s_update(state, in, inlen, false);
> }
> EXPORT_SYMBOL(blake2s_update);
>
> void blake2s_final(struct blake2s_state *state, u8 *out)
> {
> WARN_ON(IS_ENABLED(DEBUG) && !out);
> - __blake2s_final(state, out, blake2s_compress);
> + __blake2s_final(state, out, false);
> memzero_explicit(state, sizeof(*state));
> }
> EXPORT_SYMBOL(blake2s_final);
> --
> 2.34.1
>


--
Thanks,
~Nick Desaulniers