From: Urban Wallasch Date: Wed, 13 Nov 2019 14:59:57 +0000 (+0100) Subject: * Added mem_mem_bmh() using the Boyer-Moore-Horspool algorithm. X-Git-Url: https://git.packet-gain.de/?a=commitdiff_plain;h=95722ce60212d5b06723fd3a42e235568a99ef5b;p=oddbits.git * Added mem_mem_bmh() using the Boyer-Moore-Horspool algorithm. --- diff --git a/str/str.h b/str/str.h index 6270d2b..86d299f 100644 --- a/str/str.h +++ b/str/str.h @@ -27,14 +27,15 @@ * * str_istr - locate a substring ignoring case * + * mem_mem_bmh, * mem_mem, * mem_str, * mem_istr - locate a pattern in memory * * str_delim - locate the first occurrence of a delimiter in a string - + * * str_skip - skip initial sequence of specified characters in a string - + * * str_split - break down a string into separate parts * * str_ltrim, @@ -164,24 +165,54 @@ static inline char *str_istr(const char *haystack, const char *needle) { /* - * mem_mem, mem_str, mem_istr - locate a pattern in memory + * mem_mem_bmh, mem_mem, mem_str, mem_istr - locate a pattern in memory * - * The mem_mem() function finds the first occurrence of the pattern - * needle of length n in an haystack of size h. + * The mem_mem_bmh() and mem_mem() functions find the first occurrence + * of the pattern needle of length n in an haystack of size h. The + * mem_mem_bmh() function uses the Boyer-Moore-Horspool search algorithm, + * see: https://en.wikipedia.org/wiki/Boyer–Moore–Horspool_algorithm * * The mem_str() function is similar, except that it expects a string * for needle, ignoring the terminating null character. The mem_istr() * function additionally ignores the case of characters in both needle * and haystack. * - * The mem_mem(), mem_str() and mem_istr() functions return a pointer - * to the beginning of the located pattern, or NULL if the pattern is - * not found. If the search pattern is of zero length, the functions - * return haystack. + * The mem_mem_bmh(), mem_mem(), mem_str() and mem_istr() functions + * return a pointer to the beginning of the located pattern, or NULL if + * the pattern was not found. If the search pattern is of zero length, + * the functions return haystack. */ +static inline void *mem_mem_bmh(const void *haystack, size_t h, + const void *needle, size_t n) { + size_t k, skip[256]; + const uint8_t *hst = (const uint8_t *)haystack; + const uint8_t *ndl = (const uint8_t *)needle; + + if (n == 0) + return (void *)haystack; + /* Set up the finite state machine we use. */ + for (k = 0; k < 256; ++k) + skip[k] = n; + for (k = 0; k < n - 1; ++k) + skip[ndl[k]] = n - k - 1; + /* Do the search. */ + for (k = n - 1; k < h; k += skip[hst[k]]) { + int i, j; + for (j = n - 1, i = k; j >= 0 && hst[i] == ndl[j]; j--) + i--; + if (j == -1) + return (void *)(hst + i + 1); + } + return NULL; +} + static inline void *mem_mem(const void *haystack, size_t h, const void *needle, size_t n) { if (h >= n) { + /* Resort to the BMH algorithm under suitable conditions: */ + if (n > 2 && n * (h - n) > 256) + return mem_mem_bmh(haystack, h, needle, n); + /* Perform naive search: */ const char *p = haystack; h -= n; do { diff --git a/str/str_test.c b/str/str_test.c index b08b322..3520fd1 100644 --- a/str/str_test.c +++ b/str/str_test.c @@ -142,6 +142,18 @@ int main(void) { T(mem_mem(m1, sizeof m1, (char[]){-1}, 1) == m1 + 8); T(mem_mem(m1, sizeof m1, "xc", 3) == NULL); T(mem_mem(m1, sizeof m1, "xc", 2) == m1 + 6); + const char m3[] = "Lorem ipsum dolor sit amet, consectetur " + "adipiscing elit, sed do eiusmod tempor incididunt ut " + "labore et dolore magna aliqua. Ut enim ad minim veniam, " + "quis nostrud exercitation ullamco laboris nisi ut aliquip " + "ex ea commodo consequat. Duis aute irure dolor in " + "reprehenderit in voluptate velit esse cillum dolore eu " + "fugiat nulla pariatur. Excepteur sint occaecat cupidatat " + "non proident, sunt in culpa qui officia deserunt mollit " + "anim id est laborum."; + T(mem_mem_bmh(m3, strlen(m3), "", 0) == m3); + T(mem_mem_bmh(m3, strlen(m3), "FOOBAR", 6) == NULL); + T(mem_mem_bmh(m3, strlen(m3), "officia", 7) == m3 + 401); } /*****************************************/