They were correct, but since then nobody has stepped up to make the
patch. So here it is. This patch also deletes an unnecessary `test'
instruction in each of the checksum functions, and standardizes on
`$0x1c' instead of `$28' as an AND mask.
In out-of-kernel testing, this patch has no affect on performance on
my 486/66, and speeds up my Pentium system by 0% to 25% depending on
luck with the cache.
I have been running with this patch for at least a month with slip and
ethernet connections.
Tom.
--- linux/arch/i386/lib/checksum.c.0 Wed Sep 27 00:57:18 1995
+++ linux/arch/i386/lib/checksum.c Fri Sep 13 08:56:14 1996
@@ -70,8 +70,7 @@
2: movl %%edx, %%ecx
andl $0x1c, %%edx
je 4f
- shrl $2, %%edx
- testl %%esi, %%esi
+ shrl $2, %%edx # This clears CF
3: adcl (%%esi), %%eax
lea 4(%%esi), %%esi
dec %%edx
@@ -159,10 +158,9 @@
jne 1b
adcl $0, %%eax
2: movl %%edx, %%ecx
- andl $28, %%edx
+ andl $0x1c, %%edx
je 4f
- shrl $2, %%edx
- testl %%esi, %%esi
+ shrl $2, %%edx # This clears CF
3: movl %%fs:(%%esi), %%ebx
adcl %%ebx, %%eax
movl %%ebx, (%%edi)
@@ -212,52 +210,48 @@
addw %%bx, %%ax
adcl $0, %%eax
2:
- movl %%ecx, %%edx
+ pushl %%ecx
shrl $5, %%ecx
jz 2f
testl %%esi, %%esi
1: movl (%%esi), %%ebx
+ movl 4(%%esi), %%edx
adcl %%ebx, %%eax
movl %%ebx, (%%edi)
-
- movl 4(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl %%ebx, 4(%%edi)
+ adcl %%edx, %%eax
+ movl %%edx, 4(%%edi)
movl 8(%%esi), %%ebx
+ movl 12(%%esi), %%edx
adcl %%ebx, %%eax
movl %%ebx, 8(%%edi)
-
- movl 12(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl %%ebx, 12(%%edi)
+ adcl %%edx, %%eax
+ movl %%edx, 12(%%edi)
movl 16(%%esi), %%ebx
+ movl 20(%%esi), %%edx
adcl %%ebx, %%eax
movl %%ebx, 16(%%edi)
-
- movl 20(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl %%ebx, 20(%%edi)
+ adcl %%edx, %%eax
+ movl %%edx, 20(%%edi)
movl 24(%%esi), %%ebx
+ movl 28(%%esi), %%edx
adcl %%ebx, %%eax
movl %%ebx, 24(%%edi)
-
- movl 28(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl %%ebx, 28(%%edi)
+ adcl %%edx, %%eax
+ movl %%edx, 28(%%edi)
lea 32(%%esi), %%esi
lea 32(%%edi), %%edi
dec %%ecx
jne 1b
adcl $0, %%eax
-2: movl %%edx, %%ecx
- andl $28, %%edx
+2: popl %%edx
+ movl %%edx, %%ecx
+ andl $0x1c, %%edx
je 4f
- shrl $2, %%edx
- testl %%esi, %%esi
+ shrl $2, %%edx # This clears CF
3: movl (%%esi), %%ebx
adcl %%ebx, %%eax
movl %%ebx, (%%edi)
That's all.