*
* str_istr - locate a substring ignoring case
*
+ * mem_mem_bmh,
* mem_mem,
* mem_str,
* mem_istr - locate a pattern in memory
*
* str_delim - locate the first occurrence of a delimiter in a string
-
+ *
* str_skip - skip initial sequence of specified characters in a string
-
+ *
* str_split - break down a string into separate parts
*
* str_ltrim,
/*
- * mem_mem, mem_str, mem_istr - locate a pattern in memory
+ * mem_mem_bmh, mem_mem, mem_str, mem_istr - locate a pattern in memory
*
- * The mem_mem() function finds the first occurrence of the pattern
- * needle of length n in an haystack of size h.
+ * The mem_mem_bmh() and mem_mem() functions find the first occurrence
+ * of the pattern needle of length n in an haystack of size h. The
+ * mem_mem_bmh() function uses the Boyer-Moore-Horspool search algorithm,
+ * see: https://en.wikipedia.org/wiki/Boyer–Moore–Horspool_algorithm
*
* The mem_str() function is similar, except that it expects a string
* for needle, ignoring the terminating null character. The mem_istr()
* function additionally ignores the case of characters in both needle
* and haystack.
*
- * The mem_mem(), mem_str() and mem_istr() functions return a pointer
- * to the beginning of the located pattern, or NULL if the pattern is
- * not found. If the search pattern is of zero length, the functions
- * return haystack.
+ * The mem_mem_bmh(), mem_mem(), mem_str() and mem_istr() functions
+ * return a pointer to the beginning of the located pattern, or NULL if
+ * the pattern was not found. If the search pattern is of zero length,
+ * the functions return haystack.
*/
+static inline void *mem_mem_bmh(const void *haystack, size_t h,
+ const void *needle, size_t n) {
+ size_t k, skip[256];
+ const uint8_t *hst = (const uint8_t *)haystack;
+ const uint8_t *ndl = (const uint8_t *)needle;
+
+ if (n == 0)
+ return (void *)haystack;
+ /* Set up the finite state machine we use. */
+ for (k = 0; k < 256; ++k)
+ skip[k] = n;
+ for (k = 0; k < n - 1; ++k)
+ skip[ndl[k]] = n - k - 1;
+ /* Do the search. */
+ for (k = n - 1; k < h; k += skip[hst[k]]) {
+ int i, j;
+ for (j = n - 1, i = k; j >= 0 && hst[i] == ndl[j]; j--)
+ i--;
+ if (j == -1)
+ return (void *)(hst + i + 1);
+ }
+ return NULL;
+}
+
static inline void *mem_mem(const void *haystack, size_t h,
const void *needle, size_t n) {
if (h >= n) {
+ /* Resort to the BMH algorithm under suitable conditions: */
+ if (n > 2 && n * (h - n) > 256)
+ return mem_mem_bmh(haystack, h, needle, n);
+ /* Perform naive search: */
const char *p = haystack;
h -= n;
do {
T(mem_mem(m1, sizeof m1, (char[]){-1}, 1) == m1 + 8);
T(mem_mem(m1, sizeof m1, "xc", 3) == NULL);
T(mem_mem(m1, sizeof m1, "xc", 2) == m1 + 6);
+ const char m3[] = "Lorem ipsum dolor sit amet, consectetur "
+ "adipiscing elit, sed do eiusmod tempor incididunt ut "
+ "labore et dolore magna aliqua. Ut enim ad minim veniam, "
+ "quis nostrud exercitation ullamco laboris nisi ut aliquip "
+ "ex ea commodo consequat. Duis aute irure dolor in "
+ "reprehenderit in voluptate velit esse cillum dolore eu "
+ "fugiat nulla pariatur. Excepteur sint occaecat cupidatat "
+ "non proident, sunt in culpa qui officia deserunt mollit "
+ "anim id est laborum.";
+ T(mem_mem_bmh(m3, strlen(m3), "", 0) == m3);
+ T(mem_mem_bmh(m3, strlen(m3), "FOOBAR", 6) == NULL);
+ T(mem_mem_bmh(m3, strlen(m3), "officia", 7) == m3 + 401);
}
/*****************************************/