Re: New csum and csum_copy routines - and a test/benchmark program

From: Roberto Nibali (ratz@drugphish.ch)
Date: Mon Oct 28 2002 - 03:48:16 EST


Denis Vlasenko wrote:
> I took some time to develop a little test/benchmark program
> for csum and csum_copy routines (used in networking).
> It has grown to include following features:

I needed the attached patch with changes to make it work on my machine.
Could you comment on it, please? Also a Makefile would be nicer ;).

Cheers,
Roberto Nibali, ratz

-- 
echo '[q]sa[ln0=aln256%Pln256/snlbx]sb3135071790101768542287578439snlbxq'|dc

diff -ur timing_csum_copy.3/copy_kpf.S timing_csum_copy.3-ratz/copy_kpf.S --- timing_csum_copy.3/copy_kpf.S Mon Oct 28 13:35:25 2002 +++ timing_csum_copy.3-ratz/copy_kpf.S Mon Oct 28 09:41:11 2002 @@ -76,7 +76,7 @@ PREFETCH(128(%esi)) PREFETCH(192(%esi)) subl $108, %esp - fsave (%esp) # save FPU - we'll use MMX... + fsave (%esp) # save FPU - we will use MMX... fwait testl %esi, %esi # clears CF 10: diff -ur timing_csum_copy.3/copy_ntq.c timing_csum_copy.3-ratz/copy_ntq.c --- timing_csum_copy.3/copy_ntq.c Sun Oct 27 02:14:12 2002 +++ timing_csum_copy.3-ratz/copy_ntq.c Mon Oct 28 09:32:49 2002 @@ -1,6 +1,7 @@ unsigned int ntq_copy(const char *src, char *dst, int len, int sum, int *src_err_ptr, int *dst_err_ptr) { + int count; char fpu_save[108]; __asm__ __volatile__ ( " fsave %0\n" @@ -8,7 +9,7 @@ : /* output */ "=m"(fpu_save[0]) ); - int count = len/8; + count = len/8; __asm__ __volatile__ ( " testl %%ecx, %%ecx\n" //carry unset - we need it // these two back-to-back references actually _is_ the fastest way diff -ur timing_csum_copy.3/copy_ntqpf.c timing_csum_copy.3-ratz/copy_ntqpf.c --- timing_csum_copy.3/copy_ntqpf.c Fri Oct 25 20:01:44 2002 +++ timing_csum_copy.3-ratz/copy_ntqpf.c Mon Oct 28 09:33:08 2002 @@ -1,6 +1,7 @@ unsigned int ntqpf_copy(const char *src, char *dst, int len, int sum, int *src_err_ptr, int *dst_err_ptr) { + int count; char fpu_save[108]; __asm__ __volatile__ ( " "PREFETCH" (%0)\n" @@ -15,7 +16,7 @@ : /* output */ "=m"(fpu_save[0]) ); - int count = len/(8*8); + count = len/(8*8); while(count--) { __asm__ __volatile__ ( "1: "PREFETCH" 256(%1)\n" diff -ur timing_csum_copy.3/copy_ntqpf2.c timing_csum_copy.3-ratz/copy_ntqpf2.c --- timing_csum_copy.3/copy_ntqpf2.c Sun Oct 27 02:19:09 2002 +++ timing_csum_copy.3-ratz/copy_ntqpf2.c Mon Oct 28 09:33:27 2002 @@ -1,6 +1,7 @@ unsigned int ntqpf2_copy(const char *src, char *dst, int len, int sum, int *src_err_ptr, int *dst_err_ptr) { + int count; char fpu_save[108]; __asm__ __volatile__ ( " "PREFETCH" (%0)\n" @@ -15,7 +16,7 @@ : /* output */ "=m"(fpu_save[0]) ); - int count = len/(8*8); + count = len/(8*8); while(count--) { __asm__ __volatile__ ( "1: "PREFETCH" 256(%1)\n" diff -ur timing_csum_copy.3/copy_ntqpfm.c timing_csum_copy.3-ratz/copy_ntqpfm.c --- timing_csum_copy.3/copy_ntqpfm.c Sun Oct 27 02:38:21 2002 +++ timing_csum_copy.3-ratz/copy_ntqpfm.c Mon Oct 28 09:33:41 2002 @@ -1,6 +1,7 @@ unsigned int ntqpfm_copy(const char *src, char *dst, int len, int sum, int *src_err_ptr, int *dst_err_ptr) { + int count; char xmm0[16]; __asm__ __volatile__ ( " "PREFETCH" (%0)\n" @@ -14,7 +15,7 @@ : /* output */ "=m"(xmm0[0]) ); - int count = len/(8*8); + count = len/(8*8); while(count--) { __asm__ __volatile__ ( "1: "PREFETCH" 256(%1)\n" diff -ur timing_csum_copy.3/csum_kpf.S timing_csum_copy.3-ratz/csum_kpf.S --- timing_csum_copy.3/csum_kpf.S Mon Oct 28 13:35:30 2002 +++ timing_csum_copy.3-ratz/csum_kpf.S Mon Oct 28 09:43:21 2002 @@ -73,7 +73,7 @@ 40: PREFETCH(256(%esi)) 41: - addl/* -128(%esi), %eax + addl -128(%esi), %eax adcl -124(%esi), %eax adcl -120(%esi), %eax adcl -116(%esi), %eax @@ -97,7 +97,7 @@ adcl -44(%esi), %eax adcl -40(%esi), %eax adcl -36(%esi), %eax - adcl*/ -32(%esi), %eax + adcl -32(%esi), %eax adcl -28(%esi), %eax adcl -24(%esi), %eax adcl -20(%esi), %eax @@ -115,7 +115,7 @@ js 46f cmp $8,%ecx jae 40b # need prefetch - jmp 41b # don't need it + jmp 41b # do not need it 46: //adcl $0, %eax movl %edx, %ecx diff -ur timing_csum_copy.3/csum_pfm.c timing_csum_copy.3-ratz/csum_pfm.c --- timing_csum_copy.3/csum_pfm.c Sun Oct 27 02:41:17 2002 +++ timing_csum_copy.3-ratz/csum_pfm.c Mon Oct 28 09:31:58 2002 @@ -1,5 +1,6 @@ unsigned int pfm_csum(const unsigned char *src, int len, unsigned int sum) { + int count; __asm__ __volatile__ ( " "PREFETCH" (%0)\n" " "PREFETCH" 64(%0)\n" @@ -8,8 +9,8 @@ : : "r" (src) ); - int count = len/(8*8); - while(count--) { + count = len/(8*8); + while(count--) { __asm__ __volatile__ ( "1: "PREFETCH" 256(%1)\n" " xorl %%ecx, %%ecx\n" //carry unset - we need it diff -ur timing_csum_copy.3/csum_pfm2.c timing_csum_copy.3-ratz/csum_pfm2.c --- timing_csum_copy.3/csum_pfm2.c Sun Oct 27 02:42:04 2002 +++ timing_csum_copy.3-ratz/csum_pfm2.c Mon Oct 28 09:32:27 2002 @@ -1,5 +1,6 @@ unsigned int pfm2_csum(const unsigned char *src, int len, unsigned int sum) { + int count; __asm__ __volatile__ ( " "PREFETCH" (%0)\n" " "PREFETCH" 64(%0)\n" @@ -8,8 +9,8 @@ : : "r" (src) ); - int count = len/(8*8); - while(count--) { + count = len/(8*8); + while(count--) { __asm__ __volatile__ ( "1: "PREFETCH" 256(%1)\n" " xorl %%ecx, %%ecx\n" //carry unset - we need it

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu Oct 31 2002 - 22:00:36 EST