Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()
From: Zhaoxiu Zeng
Date: Thu Jul 26 2018 - 13:17:46 EST
在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>> From: Zhaoxiu Zeng <zhaoxiu.zeng@xxxxxxxxx>
>>
>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>> For the Sunday algorithm, to see
>> http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>
> So you say, but what does this really buy us? Why make this change?
> How was it tested? What is the downside of not taking this?
>
> thanks,
>
> greg k-h
>
I use the following program to test on fc28.
Compile with O2, the new version is almost 2X faster than the original.
The code size of the original is 0x80, the newer is 0xB0.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
/**
* strstr - Find the first substring in a %NUL terminated string
* @s1: The string to be searched
* @s2: The string to search for
*/
char *strstr1(const char *s1, const char *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
char *strstr2(const char *s1, const char *s2)
{
size_t l2;
const char *pchk = s1;
#if 1//def __HAVE_ARCH_STRLEN
l2 = strlen(s2);
#else
for (l2 = 0; s2[l2] != 0; l2++)
/* nothing */;
#endif
if (!l2)
return (char *)s1;
/*
* Sunday matching
*/
while (1) {
size_t i;
char k;
/* compare */
for (i = 0; i < l2; i++) {
if (s1[i] != s2[i])
break;
}
if (i >= l2)
return (char *)s1;
/* if s1 terminate early? */
if (pchk < s1 + i)
pchk = s1 + i;
do {
k = *pchk;
if (k == 0)
return NULL;
} while (pchk++ != s1 + l2);
/* find the k's last presence in s2 (k = s1[l2]) */
for (i = l2; --i != (size_t)-1; ) {
if (k == s2[i])
break;
}
/* shift */
s1 += l2 - i;
}
}
#ifdef __i386__
# define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
#elif __x86_64__
# define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
#else
# error unknown platform
#endif
#define RDTSC_START(cycles) \
do { \
register unsigned cyc_high, cyc_low; \
asm volatile("CPUID\n\t" \
"RDTSC\n\t" \
"mov %%edx, %0\n\t" \
"mov %%eax, %1\n\t" \
: "=r" (cyc_high), "=r" (cyc_low) \
:: RDTSC_CLOBBER); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)
#define RDTSC_STOP(cycles) \
do { \
register unsigned cyc_high, cyc_low; \
asm volatile("RDTSCP\n\t" \
"mov %%edx, %0\n\t" \
"mov %%eax, %1\n\t" \
"CPUID\n\t" \
: "=r" (cyc_high), "=r" (cyc_low) \
:: RDTSC_CLOBBER); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)
typedef unsigned long long TIME_T;
#define start_time(start) RDTSC_START(start)
#define stop_time(stop) RDTSC_STOP(stop)
#define diff_time(start, stop) (stop - start)
#define MAX_HAYSTACK_LENGTH 64
#define MAX_NEEDLE_LENGTH 16
#define TEST_LOOPS 1000
char haystack[MAX_HAYSTACK_LENGTH + 1];
char needle[MAX_NEEDLE_LENGTH + 1];
int main(int argc, char **argv)
{
size_t i, j, n;
void *p1, *p2;
srand(time(0));
for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
haystack[i] = rand();
if (haystack[i] != 0)
i++;
}
haystack[i] = 0;
for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
TIME_T start, stop;
unsigned long long elapsed1, elapsed2;
j = rand() % MAX_NEEDLE_LENGTH;
if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
n = i;
else
n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
needle[MAX_NEEDLE_LENGTH] = 0;
elapsed1 = ~0UL;
for (n = 0; n < TEST_LOOPS; n++) {
start_time(start);
asm volatile("":::"memory");
p1 = strstr1(haystack, needle + j);
asm volatile("":::"memory");
stop_time(stop);
if (elapsed1 > diff_time(start, stop))
elapsed1 = diff_time(start, stop);
}
elapsed2 = ~0UL;
for (n = 0; n < TEST_LOOPS; n++) {
start_time(start);
asm volatile("":::"memory");
p2 = strstr2(haystack, needle + j);
asm volatile("":::"memory");
stop_time(stop);
if (elapsed2 > diff_time(start, stop))
elapsed2 = diff_time(start, stop);
}
if (p1 != p2)
fprintf(stderr, "Error: %p != %p\n", p1, p2);
else
fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, elapsed2);
}
return 0;
}
The test result:
[zzx@fedora strstr]$ ./test
0, 3: 68, 68
1, 13: 74, 66
2, 2: 88, 94
3, 5: 96, 88
4, 8: 108, 80
5, 13: 110, 76
6, 12: 132, 82
7, 9: 142, 82
8, 11: 152, 88
9, 13: 146, 90
10, 14: 154, 94
11, 15: 132, 104
12, 1: 196, 114
13, 8: 206, 108
14, 14: 186, 110
15, 13: 194, 112
16, 0: 260, 124
17, 10: 258, 124
18, 15: 208, 136
19, 11: 276, 136
20, 13: 256, 128
21, 12: 292, 144
22, 11: 300, 142
23, 13: 278, 144
24, 7: 334, 152
25, 1: 346, 166
26, 6: 368, 168
27, 15: 278, 182
28, 1: 374, 168
29, 3: 382, 188
30, 8: 398, 172
31, 4: 402, 188
32, 0: 430, 180
33, 10: 398, 184
34, 10: 410, 192
35, 8: 434, 180
36, 7: 444, 198
37, 6: 454, 204
38, 1: 464, 220
39, 2: 472, 206
40, 4: 482, 210
41, 15: 388, 262
42, 2: 502, 210
43, 5: 510, 220
44, 8: 520, 214
45, 0: 564, 236
46, 3: 550, 242
47, 8: 552, 262
48, 10: 528, 270
49, 2: 572, 256
50, 3: 586, 246
51, 7: 590, 278
52, 14: 506, 308
53, 14: 514, 298
54, 4: 602, 240
55, 5: 626, 284
56, 15: 506, 316
57, 10: 606, 326
58, 4: 606, 240
59, 0: 596, 240
60, 13: 570, 316
61, 13: 576, 328
62, 5: 618, 284
63, 13: 576, 328
Thanks!