Re: [PATCH v2 07/15] x86/lib/copy_user_64.S: Convert to ALTERNATIVE_2

From: Borislav Petkov
Date: Wed Mar 04 2015 - 04:08:01 EST


On Wed, Mar 04, 2015 at 08:13:24AM +0100, Ingo Molnar wrote:
> Btw., the x86 memset() variants are using this today, and I think this
> is the most optimal jump-patching variant, even if it means a small
> amount of code duplication between the copy_user variants.

Yeah, the problem with that one was that we patch a huge amount of code,
see the dump below.

The X86_FEATURE_REP_GOOD thing replaces a 172 bytes memset with the 42 byte
REP;STOSQ version and the X86_FEATURE_ERMS does the same with REP;STOSB.

The 172-42 bytes at the end got padded with 130 bytes worth of NOPs.

Now, I've changed those to do simple JMPs for now so that we're working
with much less bytes at patching time (5 and if we're lucky 2). The next
step would be to do what you suggest and simply CALL the respective
variants at the call sites directly.

old insn VA: 0xffffffff812c4060, CPU feat: X86_FEATURE_REP_GOOD, size: 172
memset:

ffffffff812c4060 <memset>:
ffffffff812c4060: 49 89 fa mov %rdi,%r10
ffffffff812c4063: 40 0f b6 ce movzx %sil,%ecx
ffffffff812c4067: 48 b8 01 01 01 01 01 mov $0x101010101010101,%rax
ffffffff812c406e: 01 01 01
ffffffff812c4071: 48 0f af c1 imul %rcx,%rax
ffffffff812c4075: 41 89 f9 mov %edi,%r9d
ffffffff812c4078: 41 83 e1 07 and $0x7,%r9d
ffffffff812c407c: 75 70 jne ffffffff812c40ee
ffffffff812c407e: 48 89 d1 mov %rdx,%rcx
ffffffff812c4081: 48 c1 e9 06 shr $0x6,%rcx
ffffffff812c4085: 74 39 jz ffffffff812c40c0
ffffffff812c4087: 66 0f 1f 84 00 00 00 nop 0x0(%rax,%rax,1)
ffffffff812c408e: 00 00
ffffffff812c4090: 48 ff c9 dec %rcx
ffffffff812c4093: 48 89 07 mov %rax,(%rdi)
ffffffff812c4096: 48 89 47 08 mov %rax,0x8(%rdi)
ffffffff812c409a: 48 89 47 10 mov %rax,0x10(%rdi)
ffffffff812c409e: 48 89 47 18 mov %rax,0x18(%rdi)
ffffffff812c40a2: 48 89 47 20 mov %rax,0x20(%rdi)
ffffffff812c40a6: 48 89 47 28 mov %rax,0x28(%rdi)
ffffffff812c40aa: 48 89 47 30 mov %rax,0x30(%rdi)
ffffffff812c40ae: 48 89 47 38 mov %rax,0x38(%rdi)
ffffffff812c40b2: 48 8d 7f 40 lea 0x40(%rdi),%rdi
ffffffff812c40b6: 75 d8 jne ffffffff812c4090
ffffffff812c40b8: 0f 1f 84 00 00 00 00 nop 0x0(%rax,%rax,1)
ffffffff812c40bf: 00
ffffffff812c40c0: 89 d1 mov %edx,%ecx
ffffffff812c40c2: 83 e1 38 and $0x38,%ecx
ffffffff812c40c5: 74 14 jz ffffffff812c40db
ffffffff812c40c7: c1 e9 03 shr $0x3,%ecx
ffffffff812c40ca: 66 0f 1f 44 00 00 nop 0x0(%rax,%rax,1)
ffffffff812c40d0: ff c9 dec %ecx
ffffffff812c40d2: 48 89 07 mov %rax,(%rdi)
ffffffff812c40d5: 48 8d 7f 08 lea 0x8(%rdi),%rdi
ffffffff812c40d9: 75 f5 jne ffffffff812c40d0
ffffffff812c40db: 83 e2 07 and $0x7,%edx
ffffffff812c40de: 74 0a jz ffffffff812c40ea
ffffffff812c40e0: ff ca dec %edx
ffffffff812c40e2: 88 07 mov %al,(%rdi)
ffffffff812c40e4: 48 8d 7f 01 lea 0x1(%rdi),%rdi
ffffffff812c40e8: 75 f6 jne ffffffff812c40e0
ffffffff812c40ea: 4c 89 d0 mov %r10,%rax
ffffffff812c40ed: c3 retq
ffffffff812c40ee: 48 83 fa 07 cmp $0x7,%rdx
ffffffff812c40f2: 76 e7 jbe ffffffff812c40db
ffffffff812c40f4: 48 89 07 mov %rax,(%rdi)
ffffffff812c40f7: 49 c7 c0 08 00 00 00 mov $0x8,%r8
ffffffff812c40fe: 4d 29 c8 sub %r9,%r8
ffffffff812c4101: 4c 01 c7 add %r8,%rdi
ffffffff812c4104: 4c 29 c2 sub %r8,%rdx
ffffffff812c4107: e9 72 ff ff ff jmpq ffffffff812c407e
repl insn: 0xffffffff81e1d68d, size: 42
ffffffff81e1d68d: 49 89 f9 mov %rdi,%r9
ffffffff81e1d690: 48 89 d1 mov %rdx,%rcx
ffffffff81e1d693: 83 e2 07 and $0x7,%edx
ffffffff81e1d696: 48 c1 e9 03 shr $0x3,%rcx
ffffffff81e1d69a: 40 0f b6 f6 movzx %sil,%esi
ffffffff81e1d69e: 48 b8 01 01 01 01 01 mov $0x101010101010101,%rax
ffffffff81e1d6a5: 01 01 01
ffffffff81e1d6a8: 48 0f af c6 imul %rsi,%rax
ffffffff81e1d6ac: f3 48 ab rep stos %rax,%es:(%rdi)
ffffffff81e1d6af: 89 d1 mov %edx,%ecx
ffffffff81e1d6b1: f3 aa rep stos %al,%es:(%rdi)
ffffffff81e1d6b3: 4c 89 c8 mov %r9,%rax
ffffffff81e1d6b6: c3 retq

old insn VA: 0xffffffff812c4060, CPU feat: X86_FEATURE_ERMS, size: 172
memset:

ffffffff812c4060 <memset>:
ffffffff812c4060: 49 89 fa mov %rdi,%r10
ffffffff812c4063: 40 0f b6 ce movzx %sil,%ecx
ffffffff812c4067: 48 b8 01 01 01 01 01 mov $0x101010101010101,%rax
ffffffff812c406e: 01 01 01
ffffffff812c4071: 48 0f af c1 imul %rcx,%rax
ffffffff812c4075: 41 89 f9 mov %edi,%r9d
ffffffff812c4078: 41 83 e1 07 and $0x7,%r9d
ffffffff812c407c: 75 70 jne ffffffff812c40ee
ffffffff812c407e: 48 89 d1 mov %rdx,%rcx
ffffffff812c4081: 48 c1 e9 06 shr $0x6,%rcx
ffffffff812c4085: 74 39 jz ffffffff812c40c0
ffffffff812c4087: 66 0f 1f 84 00 00 00 nop 0x0(%rax,%rax,1)
ffffffff812c408e: 00 00
ffffffff812c4090: 48 ff c9 dec %rcx
ffffffff812c4093: 48 89 07 mov %rax,(%rdi)
ffffffff812c4096: 48 89 47 08 mov %rax,0x8(%rdi)
ffffffff812c409a: 48 89 47 10 mov %rax,0x10(%rdi)
ffffffff812c409e: 48 89 47 18 mov %rax,0x18(%rdi)
ffffffff812c40a2: 48 89 47 20 mov %rax,0x20(%rdi)
ffffffff812c40a6: 48 89 47 28 mov %rax,0x28(%rdi)
ffffffff812c40aa: 48 89 47 30 mov %rax,0x30(%rdi)
ffffffff812c40ae: 48 89 47 38 mov %rax,0x38(%rdi)
ffffffff812c40b2: 48 8d 7f 40 lea 0x40(%rdi),%rdi
ffffffff812c40b6: 75 d8 jne ffffffff812c4090
ffffffff812c40b8: 0f 1f 84 00 00 00 00 nop 0x0(%rax,%rax,1)
ffffffff812c40bf: 00
ffffffff812c40c0: 89 d1 mov %edx,%ecx
ffffffff812c40c2: 83 e1 38 and $0x38,%ecx
ffffffff812c40c5: 74 14 jz ffffffff812c40db
ffffffff812c40c7: c1 e9 03 shr $0x3,%ecx
ffffffff812c40ca: 66 0f 1f 44 00 00 nop 0x0(%rax,%rax,1)
ffffffff812c40d0: ff c9 dec %ecx
ffffffff812c40d2: 48 89 07 mov %rax,(%rdi)
ffffffff812c40d5: 48 8d 7f 08 lea 0x8(%rdi),%rdi
ffffffff812c40d9: 75 f5 jne ffffffff812c40d0
ffffffff812c40db: 83 e2 07 and $0x7,%edx
ffffffff812c40de: 74 0a jz ffffffff812c40ea
ffffffff812c40e0: ff ca dec %edx
ffffffff812c40e2: 88 07 mov %al,(%rdi)
ffffffff812c40e4: 48 8d 7f 01 lea 0x1(%rdi),%rdi
ffffffff812c40e8: 75 f6 jne ffffffff812c40e0
ffffffff812c40ea: 4c 89 d0 mov %r10,%rax
ffffffff812c40ed: c3 retq
ffffffff812c40ee: 48 83 fa 07 cmp $0x7,%rdx
ffffffff812c40f2: 76 e7 jbe ffffffff812c40db
ffffffff812c40f4: 48 89 07 mov %rax,(%rdi)
ffffffff812c40f7: 49 c7 c0 08 00 00 00 mov $0x8,%r8
ffffffff812c40fe: 4d 29 c8 sub %r9,%r8
ffffffff812c4101: 4c 01 c7 add %r8,%rdi
ffffffff812c4104: 4c 29 c2 sub %r8,%rdx
ffffffff812c4107: e9 72 ff ff ff jmpq ffffffff812c407e
repl insn: 0xffffffff81e1d6b7, size: 15
ffffffff81e1d6b7: 49 89 f9 mov %rdi,%r9
ffffffff81e1d6ba: 40 88 f0 mov %sil,%al
ffffffff81e1d6bd: 48 89 d1 mov %rdx,%rcx
ffffffff81e1d6c0: f3 aa rep stos %al,%es:(%rdi)
ffffffff81e1d6c2: 4c 89 c8 mov %r9,%rax
ffffffff81e1d6c5: c3 retq

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/