My old longstanding patch is still valid also against 2.3.17 and IMHO it's
better to have a solid chksum routine than to save some CPU cycle. I don't
like optimizations done at the expense of robuteness.
Enlarging the unrolled-loop in the copy_and_chksym call is still a good
idea IMO since the jmp will save instruction-cachelines for small buffers.
--- linux/arch/i386/lib/checksum.S 1999/05/18 20:17:13 1.1.1.3
+++ linux/arch/i386/lib/checksum.S 1999/05/24 01:40:00
@@ -18,6 +18,8 @@
* handling.
* Andi Kleen, add zeroing on error
* converted to pure assembler
+ * Andrea Arcangeli, fixed a potential buffer overflow in the
+ * 686 csum_partial, and some improvement in the 686 code.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -123,9 +125,6 @@
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf
- testl $2, %esi
- jnz 30f
-10:
movl %ecx, %edx
movl %ecx, %ebx
andl $0x7c, %ebx
@@ -134,27 +133,9 @@
shrl $2, %ebx
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
- testl %esi, %esi
+ clc
jmp *%ebx
- # Handle 2-byte-aligned regions
-20: addw (%esi), %ax
- lea 2(%esi), %esi
- adcl $0, %eax
- jmp 10b
-
-30: subl $2, %ecx
- ja 20b
- je 32f
- movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
- addl %ebx, %eax
- adcl $0, %eax
- jmp 80f
-32:
- addw (%esi), %ax # csumming 2 bytes, 2-aligned
- adcl $0, %eax
- jmp 80f
-
40:
addl -128(%esi), %eax
adcl -124(%esi), %eax
@@ -193,18 +174,20 @@
adcl $0, %eax
dec %ecx
jge 40b
- movl %edx, %ecx
-50: andl $3, %ecx
+50:
+ testl $3, %edx
jz 80f
-
- # Handle the last 1-3 bytes without jumping
- notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
- movl $0xffffff,%ebx # by the shll and shrl instructions
- shll $3,%ecx
- shrl %cl,%ebx
- andl -128(%esi),%ebx # esi is 4-aligned so should be ok
- addl %ebx,%eax
+ testl $2, %edx
+ jz 70f
+ addw -128(%esi), %ax
+ lea 2(%esi), %esi
adcl $0,%eax
+70:
+ testl $1, %edx
+ jz 80f
+ movzbl -128(%esi), %ebx
+ addl %ebx, %eax
+ adcl $0, %eax
80:
popl %ebx
popl %esi
@@ -393,36 +376,44 @@
movl ARGBASE+16(%esp),%eax #sum
movl %ecx, %edx
movl %ecx, %ebx
- shrl $6, %ecx
- andl $0x3c, %ebx
+ andl $0x7c, %ebx
+ shrl $7, %ecx
+ addl %ebx,%esi
+ addl %ebx,%edi
+ shrl $2, %ebx
negl %ebx
- subl %ebx, %esi
- subl %ebx, %edi
- lea 3f(%ebx,%ebx), %ebx
- testl %esi, %esi
+ shll $3, %ebx
+ leal 3f(%ebx), %ebx
+ clc
jmp *%ebx
-1: addl $64,%esi
- addl $64,%edi
- ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
+1:
+ ROUND1(-128) ROUND(-124) ROUND(-120) ROUND(-116)
+ ROUND (-112) ROUND(-108) ROUND(-104) ROUND(-100)
+ ROUND (-96) ROUND(-92) ROUND(-88) ROUND(-84)
+ ROUND (-80) ROUND(-76) ROUND(-72) ROUND(-68)
+ ROUND (-64) ROUND(-60) ROUND(-56) ROUND(-52)
ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
-3: adcl $0,%eax
+3:
+ leal 128(%esi), %esi
+ adcl $0,%eax
+ leal 128(%edi), %edi
dec %ecx
jge 1b
4: andl $3, %edx
jz 7f
cmpl $2, %edx
jb 5f
-SRC( movw (%esi), %dx )
+SRC( movw -128(%esi), %dx )
leal 2(%esi), %esi
-DST( movw %dx, (%edi) )
+DST( movw %dx, -128(%edi) )
leal 2(%edi), %edi
je 6f
shll $16,%edx
5:
-SRC( movb (%esi), %dl )
-DST( movb %dl, (%edi) )
+SRC( movb -128(%esi), %dl )
+DST( movb %dl, -128(%edi) )
6: addl %edx, %eax
adcl $0, %eax
7:
Andrea
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/