Re: [RFC PATCH v2 1/4] tools/nolibc: x86-64: Use `rep movsb` for `memcpy()` and `memmove()`

From: Alviro Iskandar Setiawan
Date: Sat Sep 02 2023 - 02:08:23 EST


On Sat, Sep 2, 2023 at 12:51 PM Ammar Faizi wrote:
> +__asm__ (
> +".section .text.nolibc_memmove\n"
> +".weak memmove\n"
> +"memmove:\n"
> + "movq %rdx, %rcx\n"
> + "movq %rdi, %rdx\n"
> + "movq %rdi, %rax\n"
> + "subq %rsi, %rdx\n"
> + "cmpq %rcx, %rdx\n"
> + "jnb .Lforward_copy\n"
> + "leaq -1(%rdi, %rcx, 1), %rdi\n"
> + "leaq -1(%rsi, %rcx, 1), %rsi\n"
> + "std\n"
> + "rep movsb\n"
> + "cld\n"
> + "retq\n"
> +".Lforward_copy:\n"
> + "rep movsb\n"
> + "retq\n"
> +
> +".section .text.nolibc_memcpy\n"
> +".weak memcpy\n"
> +"memcpy:\n"
> + "movq %rdi, %rax\n"
> + "movq %rdx, %rcx\n"
> + "rep movsb\n"
> + "retq\n"
> +);

Btw, sir, this can be simplified more by merging the forward copy
path, only using two "rep movsb" for both memmove() and memcpy()
should be enough?
```
__asm__ (
".section .text.nolibc_memmove_memcpy\n"
".weak memmove\n"
".weak memcpy\n"
"memmove:\n"
"movq %rdx, %rcx\n"
"movq %rdi, %rdx\n"
"movq %rdi, %rax\n"
"subq %rsi, %rdx\n"
"cmpq %rcx, %rdx\n"
"jnb __nolibc_forward_copy\n"
"leaq -1(%rdi, %rcx, 1), %rdi\n"
"leaq -1(%rsi, %rcx, 1), %rsi\n"
"std\n"
"rep movsb\n"
"cld\n"
"retq\n"

"memcpy:\n"
"movq %rdi, %rax\n"
"movq %rdx, %rcx\n"
"__nolibc_forward_copy:\n"
"rep movsb\n"
"retq\n"
);
```
Thought?

-- Viro