summaryrefslogtreecommitdiff
path: root/openbsd-compat/memmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'openbsd-compat/memmem.c')
-rw-r--r--openbsd-compat/memmem.c216
1 files changed, 169 insertions, 47 deletions
diff --git a/openbsd-compat/memmem.c b/openbsd-compat/memmem.c
index 3e5e6b5e6..ac1243eb0 100644
--- a/openbsd-compat/memmem.c
+++ b/openbsd-compat/memmem.c
@@ -1,30 +1,26 @@
1/* $OpenBSD: memmem.c,v 1.4 2015/08/31 02:53:57 guenther Exp $ */ 1/* $OpenBSD: memmem.c,v 1.5 2020/04/16 12:39:28 claudio Exp $ */
2/*- 2
3 * Copyright (c) 2005 Pascal Gloor <pascal.gloor@spale.com> 3/*
4 * Copyright (c) 2005-2020 Rich Felker, et al.
4 * 5 *
5 * Redistribution and use in source and binary forms, with or without 6 * Permission is hereby granted, free of charge, to any person obtaining
6 * modification, are permitted provided that the following conditions 7 * a copy of this software and associated documentation files (the
7 * are met: 8 * "Software"), to deal in the Software without restriction, including
8 * 1. Redistributions of source code must retain the above copyright 9 * without limitation the rights to use, copy, modify, merge, publish,
9 * notice, this list of conditions and the following disclaimer. 10 * distribute, sublicense, and/or sell copies of the Software, and to
10 * 2. Redistributions in binary form must reproduce the above copyright 11 * permit persons to whom the Software is furnished to do so, subject to
11 * notice, this list of conditions and the following disclaimer in the 12 * the following conditions:
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 * 13 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * The above copyright notice and this permission notice shall be
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * included in all copies or substantial portions of the Software.
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 *
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * SUCH DAMAGE.
28 */ 24 */
29 25
30#include "includes.h" 26#include "includes.h"
@@ -32,38 +28,164 @@
32#ifndef HAVE_MEMMEM 28#ifndef HAVE_MEMMEM
33 29
34#include <string.h> 30#include <string.h>
31#ifdef HAVE_STDINT_H
32#include <stdint.h>
33#endif
34
35static char *
36twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
37{
38 uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
39 for (h+=2, k-=2; k; k--, hw = hw<<8 | *h++)
40 if (hw == nw) return (char *)h-2;
41 return hw == nw ? (char *)h-2 : 0;
42}
43
44static char *
45threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
46{
47 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
48 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
49 for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8)
50 if (hw == nw) return (char *)h-3;
51 return hw == nw ? (char *)h-3 : 0;
52}
53
54static char *
55fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
56{
57 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
58 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
59 for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++)
60 if (hw == nw) return (char *)h-4;
61 return hw == nw ? (char *)h-4 : 0;
62}
63
64#define MAX(a,b) ((a)>(b)?(a):(b))
65#define MIN(a,b) ((a)<(b)?(a):(b))
66
67#define BITOP(a,b,op) \
68 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
35 69
36/* 70/*
37 * Find the first occurrence of the byte string s in byte string l. 71 * Maxime Crochemore and Dominique Perrin, Two-way string-matching,
72 * Journal of the ACM, 38(3):651-675, July 1991.
38 */ 73 */
39 74static char *
40void * 75twoway_memmem(const unsigned char *h, const unsigned char *z,
41memmem(const void *l, size_t l_len, const void *s, size_t s_len) 76 const unsigned char *n, size_t l)
42{ 77{
43 const char *cur, *last; 78 size_t i, ip, jp, k, p, ms, p0, mem, mem0;
44 const char *cl = l; 79 size_t byteset[32 / sizeof(size_t)] = { 0 };
45 const char *cs = s; 80 size_t shift[256];
81
82 /* Computing length of needle and fill shift table */
83 for (i=0; i<l; i++)
84 BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
85
86 /* Compute maximal suffix */
87 ip = -1; jp = 0; k = p = 1;
88 while (jp+k<l) {
89 if (n[ip+k] == n[jp+k]) {
90 if (k == p) {
91 jp += p;
92 k = 1;
93 } else k++;
94 } else if (n[ip+k] > n[jp+k]) {
95 jp += k;
96 k = 1;
97 p = jp - ip;
98 } else {
99 ip = jp++;
100 k = p = 1;
101 }
102 }
103 ms = ip;
104 p0 = p;
105
106 /* And with the opposite comparison */
107 ip = -1; jp = 0; k = p = 1;
108 while (jp+k<l) {
109 if (n[ip+k] == n[jp+k]) {
110 if (k == p) {
111 jp += p;
112 k = 1;
113 } else k++;
114 } else if (n[ip+k] < n[jp+k]) {
115 jp += k;
116 k = 1;
117 p = jp - ip;
118 } else {
119 ip = jp++;
120 k = p = 1;
121 }
122 }
123 if (ip+1 > ms+1) ms = ip;
124 else p = p0;
46 125
47 /* a zero length needle should just return the haystack */ 126 /* Periodic needle? */
48 if (s_len == 0) 127 if (memcmp(n, n+p, ms+1)) {
49 return (void *)cl; 128 mem0 = 0;
129 p = MAX(ms, l-ms-1) + 1;
130 } else mem0 = l-p;
131 mem = 0;
50 132
51 /* "s" must be smaller or equal to "l" */ 133 /* Search loop */
52 if (l_len < s_len) 134 for (;;) {
53 return NULL; 135 /* If remainder of haystack is shorter than needle, done */
136 if (z-h < l) return 0;
137
138 /* Check last byte first; advance by shift on mismatch */
139 if (BITOP(byteset, h[l-1], &)) {
140 k = l-shift[h[l-1]];
141 if (k) {
142 if (k < mem) k = mem;
143 h += k;
144 mem = 0;
145 continue;
146 }
147 } else {
148 h += l;
149 mem = 0;
150 continue;
151 }
152
153 /* Compare right half */
154 for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
155 if (k < l) {
156 h += k-ms;
157 mem = 0;
158 continue;
159 }
160 /* Compare left half */
161 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
162 if (k <= mem) return (char *)h;
163 h += p;
164 mem = mem0;
165 }
166}
167
168void *
169memmem(const void *h0, size_t k, const void *n0, size_t l)
170{
171 const unsigned char *h = h0, *n = n0;
54 172
55 /* special case where s_len == 1 */ 173 /* Return immediately on empty needle */
56 if (s_len == 1) 174 if (!l) return (void *)h;
57 return memchr(l, *cs, l_len);
58 175
59 /* the last position where its possible to find "s" in "l" */ 176 /* Return immediately when needle is longer than haystack */
60 last = cl + l_len - s_len; 177 if (k<l) return 0;
61 178
62 for (cur = cl; cur <= last; cur++) 179 /* Use faster algorithms for short needles */
63 if (cur[0] == cs[0] && memcmp(cur, cs, s_len) == 0) 180 h = memchr(h0, *n, k);
64 return (void *)cur; 181 if (!h || l==1) return (void *)h;
182 k -= h - (const unsigned char *)h0;
183 if (k<l) return 0;
184 if (l==2) return twobyte_memmem(h, k, n);
185 if (l==3) return threebyte_memmem(h, k, n);
186 if (l==4) return fourbyte_memmem(h, k, n);
65 187
66 return NULL; 188 return twoway_memmem(h, h+k, n, l);
67} 189}
68DEF_WEAK(memmem); 190DEF_WEAK(memmem);
69#endif /* HAVE_MEMMEM */ 191#endif /* HAVE_MEMMEM */