Re: [PATCH v12] x86, mce: Add memcpy_trap()

From: Ingo Molnar
Date: Fri Feb 19 2016 - 04:10:36 EST



* Luck, Tony <tony.luck@xxxxxxxxx> wrote:

> Make use of the EXTABLE_FAULT exception table entries. This routine
> returns a structure to indicate the result of the copy:
>
> struct mcsafe_ret {
> u64 trap_nr;
> u64 bytes_left;
> };
>
> If the copy is successful, then both 'trap_nr' and 'bytes_left' are zero.
>
> If we faulted during the copy, then 'trap_nr' will say which type
> of trap (X86_TRAP_PF or X86_TRAP_MC) and 'bytes_left' says how many
> bytes were not copied.
>
> Note that this is probably the first of several copy functions.
> We can make new ones for non-temporal cache handling etc.
>
> Reviewed-by: Borislav Petkov <bp@xxxxxxx>
> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
>
> V12 (part3 only - parts 1,2,4 are in tip tree)
>
> Ingo: More meaningful names for fields of return structure
> PeterZ: Better name for copy function: memcpy_trap()
> Ingo: Don't use numeric labels in new asm code
> Ingo: Consistent capitalization in comments.
> Ingo: Periods between sentences in comments.
>
> Not addressed: Linus' comment that perhaps we could try/catch
> syntactic sugar instead of returning multiple values in a structure.
>
> arch/x86/include/asm/string_64.h | 26 +++++++
> arch/x86/kernel/x8664_ksyms_64.c | 2 +
> arch/x86/lib/memcpy_64.S | 158 +++++++++++++++++++++++++++++++++++++++
> 3 files changed, 186 insertions(+)
>
> diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
> index ff8b9a17dc4b..65e5793b7590 100644
> --- a/arch/x86/include/asm/string_64.h
> +++ b/arch/x86/include/asm/string_64.h
> @@ -78,6 +78,32 @@ int strcmp(const char *cs, const char *ct);
> #define memset(s, c, n) __memset(s, c, n)
> #endif
>
> +/**
> + * struct memcpy_trap_ret - return value from memcpy_trap()
> + *
> + * @trap_nr x86 trap number if the copy failed
> + * @bytes_left zero for successful copy else number of bytes not copied
> + */
> +struct memcpy_trap_ret {
> + u64 trap_nr;
> + u64 bytes_left;
> +};
> +
> +/**
> + * memcpy_trap - copy memory with indication if a trap interrupted the copy
> + *
> + * @dst: destination address
> + * @src: source address
> + * @cnt: number of bytes to copy
> + *
> + * Low level memory copy function that catches traps and indicates whether
> + * the copy succeeded and if not, why it failed.
> + *
> + * Return is struct memcpy_trap_ret which provides both the number of bytes
> + * not copied and the reason for the failure.
> + */
> +struct memcpy_trap_ret memcpy_trap(void *dst, const void __user *src, size_t cnt);
> +
> #endif /* __KERNEL__ */
>
> #endif /* _ASM_X86_STRING_64_H */
> diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
> index a0695be19864..40866e2cbcc4 100644
> --- a/arch/x86/kernel/x8664_ksyms_64.c
> +++ b/arch/x86/kernel/x8664_ksyms_64.c
> @@ -37,6 +37,8 @@ EXPORT_SYMBOL(__copy_user_nocache);
> EXPORT_SYMBOL(_copy_from_user);
> EXPORT_SYMBOL(_copy_to_user);
>
> +EXPORT_SYMBOL_GPL(memcpy_trap);
> +
> EXPORT_SYMBOL(copy_page);
> EXPORT_SYMBOL(clear_page);
>
> diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
> index 16698bba87de..aecdfc41c114 100644
> --- a/arch/x86/lib/memcpy_64.S
> +++ b/arch/x86/lib/memcpy_64.S
> @@ -177,3 +177,161 @@ ENTRY(memcpy_orig)
> .Lend:
> retq
> ENDPROC(memcpy_orig)
> +
> +#ifndef CONFIG_UML
> +/*
> + * memcpy_trap - memory copy with machine check exception handling
> + * Note that we only catch machine checks when reading the source addresses.
> + * Writes to target are posted and don't generate machine checks.
> + */
> +ENTRY(memcpy_trap)
> + cmpl $8,%edx
> + /* Less than 8 bytes? Go to byte copy loop */
> + jb .L_no_whole_words
> +
> + /* Check for bad alignment of source */
> + testl $7,%esi
> + /* Already aligned */
> + jz .L_8byte_aligned
> +
> + /* Copy one byte at a time until source is 8-byte aligned */
> + movl %esi,%ecx
> + andl $7,%ecx
> + subl $8,%ecx
> + negl %ecx
> + subl %ecx,%edx
> +.L_copy_leading_bytes:
> + movb (%rsi),%al
> + movb %al,(%rdi)
> + incq %rsi
> + incq %rdi
> + decl %ecx
> + jnz .L_copy_leading_bytes
> +
> +.L_8byte_aligned:
> + /* Figure out how many whole cache lines (64-bytes) to copy */
> + movl %edx,%ecx
> + andl $63,%edx
> + shrl $6,%ecx
> + jz .L_no_whole_cache_lines
> +
> + /* Loop copying whole cache lines */
> +.L_cache_w0: movq (%rsi),%r8
> +.L_cache_w1: movq 1*8(%rsi),%r9
> +.L_cache_w2: movq 2*8(%rsi),%r10
> +.L_cache_w3: movq 3*8(%rsi),%r11
> + movq %r8,(%rdi)
> + movq %r9,1*8(%rdi)
> + movq %r10,2*8(%rdi)
> + movq %r11,3*8(%rdi)
> +.L_cache_w4: movq 4*8(%rsi),%r8
> +.L_cache_w5: movq 5*8(%rsi),%r9
> +.L_cache_w6: movq 6*8(%rsi),%r10
> +.L_cache_w7: movq 7*8(%rsi),%r11
> + movq %r8,4*8(%rdi)
> + movq %r9,5*8(%rdi)
> + movq %r10,6*8(%rdi)
> + movq %r11,7*8(%rdi)
> + leaq 64(%rsi),%rsi
> + leaq 64(%rdi),%rdi
> + decl %ecx
> + jnz .L_cache_w0
> +
> + /* Are there any trailing 8-byte words? */
> +.L_no_whole_cache_lines:
> + movl %edx,%ecx
> + andl $7,%edx
> + shrl $3,%ecx
> + jz .L_no_whole_words
> +
> + /* Copy trailing words */
> +.L_copy_trailing_words:
> + movq (%rsi),%r8
> + mov %r8,(%rdi)
> + leaq 8(%rsi),%rsi
> + leaq 8(%rdi),%rdi
> + decl %ecx
> + jnz .L_copy_trailing_words
> +
> + /* Any trailing bytes? */
> +.L_no_whole_words:
> + andl %edx,%edx
> + jz .L_done_memcpy_trap
> +
> + /* Copy trailing bytes */
> + movl %edx,%ecx
> +.L_copy_trailing_bytes:
> + movb (%rsi),%al
> + movb %al,(%rdi)
> + incq %rsi
> + incq %rdi
> + decl %ecx
> + jnz .L_copy_trailing_bytes
> +
> + /* Copy successful. Return .remain = 0, .trapnr = 0 */
> +.L_done_memcpy_trap:
> + xorq %rax, %rax
> + xorq %rdx, %rdx
> + ret
> +
> + .section .fixup,"ax"
> + /*
> + * The machine check handler loaded %rax with trap number.
> + * We just need to make sure %edx has the number of
> + * bytes remaining.
> + */
> +.L_fix_leading_bytes:
> + add %ecx,%edx
> + ret
> +.L_fix_cache_w0:
> + shl $6,%ecx
> + add %ecx,%edx
> + ret
> +.L_fix_cache_w1:
> + shl $6,%ecx
> + lea -8(%ecx,%edx),%edx
> + ret
> +.L_fix_cache_w2:
> + shl $6,%ecx
> + lea -16(%ecx,%edx),%edx
> + ret
> +.L_fix_cache_w3:
> + shl $6,%ecx
> + lea -24(%ecx,%edx),%edx
> + ret
> +.L_fix_cache_w4:
> + shl $6,%ecx
> + lea -32(%ecx,%edx),%edx
> + ret
> +.L_fix_cache_w5:
> + shl $6,%ecx
> + lea -40(%ecx,%edx),%edx
> + ret
> +.L_fix_cache_w6:
> + shl $6,%ecx
> + lea -48(%ecx,%edx),%edx
> + ret
> +.L_fix_cache_w7:
> + shl $6,%ecx
> + lea -56(%ecx,%edx),%edx
> + ret
> +.L_fix_trailing_words:
> + lea (%rdx,%rcx,8),%rdx
> + ret
> +.L_fix_trailing_bytes:
> + mov %ecx,%edx
> + ret
> + .previous
> +
> + _ASM_EXTABLE_FAULT(.L_copy_leading_bytes,.L_fix_leading_bytes)
> + _ASM_EXTABLE_FAULT(.L_cache_w0,.L_fix_cache_w0)
> + _ASM_EXTABLE_FAULT(.L_cache_w1,.L_fix_cache_w1)
> + _ASM_EXTABLE_FAULT(.L_cache_w3,.L_fix_cache_w2)
> + _ASM_EXTABLE_FAULT(.L_cache_w3,.L_fix_cache_w3)
> + _ASM_EXTABLE_FAULT(.L_cache_w4,.L_fix_cache_w4)
> + _ASM_EXTABLE_FAULT(.L_cache_w5,.L_fix_cache_w5)
> + _ASM_EXTABLE_FAULT(.L_cache_w6,.L_fix_cache_w6)
> + _ASM_EXTABLE_FAULT(.L_cache_w7,.L_fix_cache_w7)
> + _ASM_EXTABLE_FAULT(.L_copy_trailing_words,.L_fix_trailing_words)
> + _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes,.L_fix_trailing_bytes)
> +#endif

Ok, I absolutely love this assembly code, it's already a lot easier to read than
95% of the x86 assembly code we have today!

There's two minor things I've noticed:

1) please put a space between instruction operands, i.e.:

- shl $6,%ecx
+ shl $6, %ecx

2)

There's a way to make the exception fixup stubs more readable, by aligning them
vertically, via something like:

.L_fix_cache_w0: shl $6, %ecx; add %ecx, %edx; ret
.L_fix_cache_w1: shl $6, %ecx; lea -8(%ecx,%edx), %edx; ret
.L_fix_cache_w2: shl $6, %ecx; lea -16(%ecx,%edx), %edx; ret
.L_fix_cache_w3: shl $6, %ecx; lea -24(%ecx,%edx), %edx; ret
.L_fix_cache_w4: shl $6, %ecx; lea -32(%ecx,%edx), %edx; ret
.L_fix_cache_w5: shl $6, %ecx; lea -40(%ecx,%edx), %edx; ret
.L_fix_cache_w6: shl $6, %ecx; lea -48(%ecx,%edx), %edx; ret
.L_fix_cache_w7: shl $6, %ecx; lea -56(%ecx,%edx), %edx; ret

this also makes it a lot easier to check the correctness of the fixup stubs. Also
this layout makes it clear that the first fixup stub uses 'ADD', while the others
use LEA, etc.

Thanks,

Ingo