Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and strnstr()

From: Zhaoxiu Zeng
Date: Fri Jul 27 2018 - 02:06:13 EST


在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>>> From: Zhaoxiu Zeng <zhaoxiu.zeng@xxxxxxxxx>
>>>
>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>>> For the Sunday algorithm, to see
>>> http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>>
>> So you say, but what does this really buy us? Why make this change?
>> How was it tested? What is the downside of not taking this?
>>
>> thanks,
>>
>> greg k-h
>>
>
> I use the following program to test on fc28.
> Compile with O2, the new version is almost 2X faster than the original.
> The code size of the original is 0x80, the newer is 0xB0.
>
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <stdint.h>
> #include <string.h>
> #include <time.h>
> #include <unistd.h>
>
> /**
> * strstr - Find the first substring in a %NUL terminated string
> * @s1: The string to be searched
> * @s2: The string to search for
> */
> char *strstr1(const char *s1, const char *s2)
> {
> size_t l1, l2;
>
> l2 = strlen(s2);
> if (!l2)
> return (char *)s1;
> l1 = strlen(s1);
> while (l1 >= l2) {
> l1--;
> if (!memcmp(s1, s2, l2))
> return (char *)s1;
> s1++;
> }
> return NULL;
> }
>
> char *strstr2(const char *s1, const char *s2)
> {
> size_t l2;
> const char *pchk = s1;
>
> #if 1//def __HAVE_ARCH_STRLEN
> l2 = strlen(s2);
> #else
> for (l2 = 0; s2[l2] != 0; l2++)
> /* nothing */;
> #endif
> if (!l2)
> return (char *)s1;
>
> /*
> * Sunday matching
> */
> while (1) {
> size_t i;
> char k;
>
> /* compare */
> for (i = 0; i < l2; i++) {
> if (s1[i] != s2[i])
> break;
> }
> if (i >= l2)
> return (char *)s1;
>
> /* if s1 terminate early? */
> if (pchk < s1 + i)
> pchk = s1 + i;
> do {
> k = *pchk;
> if (k == 0)
> return NULL;
> } while (pchk++ != s1 + l2);
>
> /* find the k's last presence in s2 (k = s1[l2]) */
> for (i = l2; --i != (size_t)-1; ) {
> if (k == s2[i])
> break;
> }
>
> /* shift */
> s1 += l2 - i;
> }
> }
>
> #ifdef __i386__
> # define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
> #elif __x86_64__
> # define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
> #else
> # error unknown platform
> #endif
>
> #define RDTSC_START(cycles) \
> do { \
> register unsigned cyc_high, cyc_low; \
> asm volatile("CPUID\n\t" \
> "RDTSC\n\t" \
> "mov %%edx, %0\n\t" \
> "mov %%eax, %1\n\t" \
> : "=r" (cyc_high), "=r" (cyc_low) \
> :: RDTSC_CLOBBER); \
> (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
> } while (0)
>
> #define RDTSC_STOP(cycles) \
> do { \
> register unsigned cyc_high, cyc_low; \
> asm volatile("RDTSCP\n\t" \
> "mov %%edx, %0\n\t" \
> "mov %%eax, %1\n\t" \
> "CPUID\n\t" \
> : "=r" (cyc_high), "=r" (cyc_low) \
> :: RDTSC_CLOBBER); \
> (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
> } while (0)
>
> typedef unsigned long long TIME_T;
> #define start_time(start) RDTSC_START(start)
> #define stop_time(stop) RDTSC_STOP(stop)
> #define diff_time(start, stop) (stop - start)
>
> #define MAX_HAYSTACK_LENGTH 64
> #define MAX_NEEDLE_LENGTH 16
> #define TEST_LOOPS 1000
>
> char haystack[MAX_HAYSTACK_LENGTH + 1];
> char needle[MAX_NEEDLE_LENGTH + 1];
>
> int main(int argc, char **argv)
> {
> size_t i, j, n;
> void *p1, *p2;
>
> srand(time(0));
>
> for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
> haystack[i] = rand();
> if (haystack[i] != 0)
> i++;
> }
> haystack[i] = 0;
>
> for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
> TIME_T start, stop;
> unsigned long long elapsed1, elapsed2;
>
> j = rand() % MAX_NEEDLE_LENGTH;
> if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
> n = i;
> else
> n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
> memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
> needle[MAX_NEEDLE_LENGTH] = 0;
>
> elapsed1 = ~0UL;
> for (n = 0; n < TEST_LOOPS; n++) {
> start_time(start);
> asm volatile("":::"memory");
> p1 = strstr1(haystack, needle + j);
> asm volatile("":::"memory");
> stop_time(stop);
>
> if (elapsed1 > diff_time(start, stop))
> elapsed1 = diff_time(start, stop);
> }
>
> elapsed2 = ~0UL;
> for (n = 0; n < TEST_LOOPS; n++) {
> start_time(start);
> asm volatile("":::"memory");
> p2 = strstr2(haystack, needle + j);
> asm volatile("":::"memory");
> stop_time(stop);
>
> if (elapsed2 > diff_time(start, stop))
> elapsed2 = diff_time(start, stop);
> }
>
> if (p1 != p2)
> fprintf(stderr, "Error: %p != %p\n", p1, p2);
> else
> fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, elapsed2);
> }
>
> return 0;
> }
>
> The test result:
> [zzx@fedora strstr]$ ./test
> 0, 3: 68, 68
> 1, 13: 74, 66
> 2, 2: 88, 94
> 3, 5: 96, 88
> 4, 8: 108, 80
> 5, 13: 110, 76
> 6, 12: 132, 82
> 7, 9: 142, 82
> 8, 11: 152, 88
> 9, 13: 146, 90
> 10, 14: 154, 94
> 11, 15: 132, 104
> 12, 1: 196, 114
> 13, 8: 206, 108
> 14, 14: 186, 110
> 15, 13: 194, 112
> 16, 0: 260, 124
> 17, 10: 258, 124
> 18, 15: 208, 136
> 19, 11: 276, 136
> 20, 13: 256, 128
> 21, 12: 292, 144
> 22, 11: 300, 142
> 23, 13: 278, 144
> 24, 7: 334, 152
> 25, 1: 346, 166
> 26, 6: 368, 168
> 27, 15: 278, 182
> 28, 1: 374, 168
> 29, 3: 382, 188
> 30, 8: 398, 172
> 31, 4: 402, 188
> 32, 0: 430, 180
> 33, 10: 398, 184
> 34, 10: 410, 192
> 35, 8: 434, 180
> 36, 7: 444, 198
> 37, 6: 454, 204
> 38, 1: 464, 220
> 39, 2: 472, 206
> 40, 4: 482, 210
> 41, 15: 388, 262
> 42, 2: 502, 210
> 43, 5: 510, 220
> 44, 8: 520, 214
> 45, 0: 564, 236
> 46, 3: 550, 242
> 47, 8: 552, 262
> 48, 10: 528, 270
> 49, 2: 572, 256
> 50, 3: 586, 246
> 51, 7: 590, 278
> 52, 14: 506, 308
> 53, 14: 514, 298
> 54, 4: 602, 240
> 55, 5: 626, 284
> 56, 15: 506, 316
> 57, 10: 606, 326
> 58, 4: 606, 240
> 59, 0: 596, 240
> 60, 13: 570, 316
> 61, 13: 576, 328
> 62, 5: 618, 284
> 63, 13: 576, 328
>
> Thanks!
>

The original strnstr might has a bug too!
For example, assume s1 is "123\0abc...." and s2 is "abc\0",
call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.