Re: [PATCH v2] mm: kfence: Improve the performance of __kfence_alloc() and __kfence_free()

From: Alexander Potapenko
Date: Tue Apr 04 2023 - 05:26:00 EST


> +static inline void check_canary(const struct kfence_metadata *meta)
> +{
> + const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
> + unsigned long addr = pageaddr;
>
> /*
> - * We'll iterate over each canary byte per-side until fn() returns
> - * false. However, we'll still iterate over the canary bytes to the
> + * We'll iterate over each canary byte per-side until a corrupted byte
> + * is found. However, we'll still iterate over the canary bytes to the
> * right of the object even if there was an error in the canary bytes to
> * the left of the object. Specifically, if check_canary_byte()
> * generates an error, showing both sides might give more clues as to
> @@ -339,16 +348,35 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta,
> */
>
> /* Apply to left of object. */
> - for (addr = pageaddr; addr < meta->addr; addr++) {
> - if (!fn((u8 *)addr))
> + for (; meta->addr - addr >= sizeof(u64); addr += sizeof(u64)) {
> + if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64))
> break;
> }
I am confused. Right now this loop either runs from pageaddr to
meta_addr if there's no corruption, or breaks at the first corrupted
byte.
Regardless of that, we are applying check_canary_byte() to every byte
of that range in the following loop.
Shouldn't the two be nested, like in the case of the canary bytes to
the right of the object?


>
> - /* Apply to right of object. */
> - for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) {
> - if (!fn((u8 *)addr))
> + /*
> + * If the canary is corrupted in a certain 64 bytes, or the canary
> + * memory cannot be completely covered by multiple consecutive 64 bytes,
> + * it needs to be checked one by one.
> + */
> + for (; addr < meta->addr; addr++) {
> + if (unlikely(!check_canary_byte((u8 *)addr)))
> break;
> }
> +
> + /* Apply to right of object. */
> + for (addr = meta->addr + meta->size; addr % sizeof(u64) != 0; addr++) {
> + if (unlikely(!check_canary_byte((u8 *)addr)))
> + return;
> + }
> + for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64)) {
> + if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64)) {
> +
> + for (; addr - pageaddr < PAGE_SIZE; addr++) {
> + if (!check_canary_byte((u8 *)addr))
> + return;
> + }
> + }
> + }
> }
>
> static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp,
> @@ -434,7 +462,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
> #endif
>
> /* Memory initialization. */
> - for_each_canary(meta, set_canary_byte);
> + set_canary(meta);
>
> /*
> * We check slab_want_init_on_alloc() ourselves, rather than letting
> @@ -495,7 +523,7 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z
> alloc_covered_add(meta->alloc_stack_hash, -1);
>
> /* Check canary bytes for memory corruption. */
> - for_each_canary(meta, check_canary_byte);
> + check_canary(meta);
>
> /*
> * Clear memory if init-on-free is set. While we protect the page, the
> @@ -751,7 +779,7 @@ static void kfence_check_all_canary(void)
> struct kfence_metadata *meta = &kfence_metadata[i];
>
> if (meta->state == KFENCE_OBJECT_ALLOCATED)
> - for_each_canary(meta, check_canary_byte);
> + check_canary(meta);
> }
> }
>
> diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
> index 600f2e2431d6..2aafc46a4aaf 100644
> --- a/mm/kfence/kfence.h
> +++ b/mm/kfence/kfence.h
> @@ -21,7 +21,15 @@
> * lower 3 bits of the address, to detect memory corruptions with higher
> * probability, where similar constants are used.
> */
> -#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
> +#define KFENCE_CANARY_PATTERN_U8(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
> +
> +/*
> + * Define a continuous 8-byte canary starting from a multiple of 8. The canary
> + * of each byte is only related to the lowest three bits of its address, so the
> + * canary of every 8 bytes is the same. 64-bit memory can be filled and checked
> + * at a time instead of byte by byte to improve performance.
> + */
> +#define KFENCE_CANARY_PATTERN_U64 ((u64)0xaaaaaaaaaaaaaaaa ^ (u64)(0x0706050403020100))
>
> /* Maximum stack depth for reports. */
> #define KFENCE_STACK_DEPTH 64
> diff --git a/mm/kfence/report.c b/mm/kfence/report.c
> index 60205f1257ef..197430a5be4a 100644
> --- a/mm/kfence/report.c
> +++ b/mm/kfence/report.c
> @@ -168,7 +168,7 @@ static void print_diff_canary(unsigned long address, size_t bytes_to_show,
>
> pr_cont("[");
> for (cur = (const u8 *)address; cur < end; cur++) {
> - if (*cur == KFENCE_CANARY_PATTERN(cur))
> + if (*cur == KFENCE_CANARY_PATTERN_U8(cur))
> pr_cont(" .");
> else if (no_hash_pointers)
> pr_cont(" 0x%02x", *cur);
> --
> 2.20.1
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@xxxxxxxxxxxxxxxx.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20230403122738.6006-1-zhangpeng.00%40bytedance.com.



--
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Liana Sebastian
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg