Different Memcpy system (see memcpy by 8)

Alex K. (kolex@erols.com)
Sun, 16 May 1999 20:56:00 -0400


This is a multi-part message in MIME format.
--------------9FA3A7A71FD58741142753C5
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I am assuming that if you are reading this, you read the earlier memcpy
by 8 posting. I have made a test program in C (I checked the assembly
results, and they seem about right, a little room for optimization, but
not much at -O2) that for an N-element memset sets 0..N/8 8-byte blocks
using pairs of dwords, and then fills in the other [0-7] elements by
characters. The second test program used a modified version of the
method that string-486.h uses, determining the action based on the array
size (memsetby4, memsetby2, etc...) For odd numbers of elements, the
first algorithm outperforms the second one by far; for mod 2 numbers by
almost 50%, and about the same at mod 4 and higher powers of two. See
the graph at
<http://www.mbhs.edu/~akhripin/results.jpg>
(Ratio 1 is for the first algorithm, ratio 2 is for the second)
and the test output at
<http://www.mbhs.edu/~akhripin/results.txt>
The two programs are attached.
I think that it would be useful to add this as a supplementary function
to the kernel library under a different name. It has more overhead, so
it would only be useful in certain applications (where there is a high
chance of an odd length). This same thing could also be used for memcpy
and memcmp, though memcmp would be tricky to do on the limited number of
x86 registers.
Someone tell me what they think. Am I a raving moron?

Regards,
Alex Khripin
NOTE: the programs were run using a shell script, not included because
it's not relevant.

--------------9FA3A7A71FD58741142753C5
Content-Type: text/plain; charset=us-ascii;
name="memsettest.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="memsettest.c"

#include <stdio.h>
#include <time.h>

#include <stdio.h>
#include <time.h>

inline void setmem(void* buf,int len,unsigned char VAR){
register void* str=buf;
register void* max=len+str;
register void* to;
register int rem;
register int newint;
newint=(((((VAR<<8)|VAR)<<8)|VAR)<<8)|VAR;
rem=len%8;
to=max-rem;
while(str<to){
*((int*)str)=newint;
str+=4;
*((int*)str)=newint;
str+=4;
}
while(str<max){
*((char*)str)=VAR;
str++;
}
}
int main(void){
int len;
int var;
int tstcounter;
int counter;
unsigned char *tstr;
int start_time;
scanf("%d %d",&len,&var);
start_time=time(NULL);
tstr=(char*)malloc(len);
for(tstcounter=0;tstcounter<10000000;tstcounter++){
setmem(tstr,len,var);
}
printf("%u\n",time(NULL)-start_time);
printf("\n");
free(tstr);
return 0;
}

--------------9FA3A7A71FD58741142753C5
Content-Type: text/plain; charset=us-ascii;
name="memsettest2.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="memsettest2.c"

#include <stdio.h>
#include <time.h>

#include <stdio.h>
#include <time.h>

inline void setmem(void* buf,int len,unsigned char VAR){
register void* str=buf;
register void* max=len+str;
register int nvar;
if(len%16==0){
nvar=(((((VAR<<8)|VAR)<<8)|VAR)<<8)|VAR;
while(str<max){
*((unsigned int*)str)=nvar;
str+=4;
*((unsigned int*)str)=nvar;
str+=4;
*((unsigned int*)str)=nvar;
str+=4;
*((unsigned int*)str)=nvar;
str+=4;
}
}
else if(len%4==0){
nvar=(((((VAR<<8)|VAR)<<8)|VAR)<<8)|VAR;
while(str<max){
*((unsigned int*)str)=nvar;
str+=4;
}
}
else if(len%2==0){
nvar=(VAR<<8)|VAR;
while(str<max){
*((unsigned short int*)str)=nvar;
str+=2;
}
}
else{
nvar=VAR;
while(str<max){
*((unsigned char*)str)=VAR;
str++;
}
}
}
int main(void){
int len;
int var;
int tstcounter;
int counter;
unsigned char *tstr;
int start_time;
scanf("%d %d",&len,&var);
start_time=time(NULL);
tstr=(char*)malloc(len);
for(tstcounter=0;tstcounter<10000000;tstcounter++){
setmem(tstr,len,var);
}
printf("%u\n",time(NULL)-start_time);
printf("\n");
free(tstr);
return 0;
}

--------------9FA3A7A71FD58741142753C5--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/