summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDamien Miller <djm@mindrot.org>2020-08-10 13:24:09 +1000
committerDamien Miller <djm@mindrot.org>2020-08-10 13:24:20 +1000
commiteaf2765efe8bc74feba85c34295d067637fc6635 (patch)
tree6b806f20dc0fa2b898d02a98335fd435d8cebf2d
parented6bef77f5bb5b8f9ca2914478949e29f2f0a780 (diff)
sync memmem.c with OpenBSD
-rw-r--r--openbsd-compat/memmem.c214
1 files changed, 167 insertions, 47 deletions
diff --git a/openbsd-compat/memmem.c b/openbsd-compat/memmem.c
index 3e5e6b5e6..eb64eaab8 100644
--- a/openbsd-compat/memmem.c
+++ b/openbsd-compat/memmem.c
@@ -1,30 +1,26 @@
1/* $OpenBSD: memmem.c,v 1.4 2015/08/31 02:53:57 guenther Exp $ */ 1/* $OpenBSD: memmem.c,v 1.5 2020/04/16 12:39:28 claudio Exp $ */
2/*- 2
3 * Copyright (c) 2005 Pascal Gloor <pascal.gloor@spale.com> 3/*
4 * Copyright (c) 2005-2020 Rich Felker, et al.
4 * 5 *
5 * Redistribution and use in source and binary forms, with or without 6 * Permission is hereby granted, free of charge, to any person obtaining
6 * modification, are permitted provided that the following conditions 7 * a copy of this software and associated documentation files (the
7 * are met: 8 * "Software"), to deal in the Software without restriction, including
8 * 1. Redistributions of source code must retain the above copyright 9 * without limitation the rights to use, copy, modify, merge, publish,
9 * notice, this list of conditions and the following disclaimer. 10 * distribute, sublicense, and/or sell copies of the Software, and to
10 * 2. Redistributions in binary form must reproduce the above copyright 11 * permit persons to whom the Software is furnished to do so, subject to
11 * notice, this list of conditions and the following disclaimer in the 12 * the following conditions:
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 * 13 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * The above copyright notice and this permission notice shall be
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * included in all copies or substantial portions of the Software.
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 *
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * SUCH DAMAGE.
28 */ 24 */
29 25
30#include "includes.h" 26#include "includes.h"
@@ -32,38 +28,162 @@
32#ifndef HAVE_MEMMEM 28#ifndef HAVE_MEMMEM
33 29
34#include <string.h> 30#include <string.h>
31#include <stdint.h>
32
33static char *
34twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
35{
36 uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
37 for (h+=2, k-=2; k; k--, hw = hw<<8 | *h++)
38 if (hw == nw) return (char *)h-2;
39 return hw == nw ? (char *)h-2 : 0;
40}
41
42static char *
43threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
44{
45 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
46 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
47 for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8)
48 if (hw == nw) return (char *)h-3;
49 return hw == nw ? (char *)h-3 : 0;
50}
51
52static char *
53fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
54{
55 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
56 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
57 for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++)
58 if (hw == nw) return (char *)h-4;
59 return hw == nw ? (char *)h-4 : 0;
60}
61
62#define MAX(a,b) ((a)>(b)?(a):(b))
63#define MIN(a,b) ((a)<(b)?(a):(b))
64
65#define BITOP(a,b,op) \
66 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
35 67
36/* 68/*
37 * Find the first occurrence of the byte string s in byte string l. 69 * Maxime Crochemore and Dominique Perrin, Two-way string-matching,
70 * Journal of the ACM, 38(3):651-675, July 1991.
38 */ 71 */
39 72static char *
40void * 73twoway_memmem(const unsigned char *h, const unsigned char *z,
41memmem(const void *l, size_t l_len, const void *s, size_t s_len) 74 const unsigned char *n, size_t l)
42{ 75{
43 const char *cur, *last; 76 size_t i, ip, jp, k, p, ms, p0, mem, mem0;
44 const char *cl = l; 77 size_t byteset[32 / sizeof(size_t)] = { 0 };
45 const char *cs = s; 78 size_t shift[256];
79
80 /* Computing length of needle and fill shift table */
81 for (i=0; i<l; i++)
82 BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
83
84 /* Compute maximal suffix */
85 ip = -1; jp = 0; k = p = 1;
86 while (jp+k<l) {
87 if (n[ip+k] == n[jp+k]) {
88 if (k == p) {
89 jp += p;
90 k = 1;
91 } else k++;
92 } else if (n[ip+k] > n[jp+k]) {
93 jp += k;
94 k = 1;
95 p = jp - ip;
96 } else {
97 ip = jp++;
98 k = p = 1;
99 }
100 }
101 ms = ip;
102 p0 = p;
103
104 /* And with the opposite comparison */
105 ip = -1; jp = 0; k = p = 1;
106 while (jp+k<l) {
107 if (n[ip+k] == n[jp+k]) {
108 if (k == p) {
109 jp += p;
110 k = 1;
111 } else k++;
112 } else if (n[ip+k] < n[jp+k]) {
113 jp += k;
114 k = 1;
115 p = jp - ip;
116 } else {
117 ip = jp++;
118 k = p = 1;
119 }
120 }
121 if (ip+1 > ms+1) ms = ip;
122 else p = p0;
46 123
47 /* a zero length needle should just return the haystack */ 124 /* Periodic needle? */
48 if (s_len == 0) 125 if (memcmp(n, n+p, ms+1)) {
49 return (void *)cl; 126 mem0 = 0;
127 p = MAX(ms, l-ms-1) + 1;
128 } else mem0 = l-p;
129 mem = 0;
50 130
51 /* "s" must be smaller or equal to "l" */ 131 /* Search loop */
52 if (l_len < s_len) 132 for (;;) {
53 return NULL; 133 /* If remainder of haystack is shorter than needle, done */
134 if (z-h < l) return 0;
135
136 /* Check last byte first; advance by shift on mismatch */
137 if (BITOP(byteset, h[l-1], &)) {
138 k = l-shift[h[l-1]];
139 if (k) {
140 if (k < mem) k = mem;
141 h += k;
142 mem = 0;
143 continue;
144 }
145 } else {
146 h += l;
147 mem = 0;
148 continue;
149 }
150
151 /* Compare right half */
152 for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
153 if (k < l) {
154 h += k-ms;
155 mem = 0;
156 continue;
157 }
158 /* Compare left half */
159 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
160 if (k <= mem) return (char *)h;
161 h += p;
162 mem = mem0;
163 }
164}
165
166void *
167memmem(const void *h0, size_t k, const void *n0, size_t l)
168{
169 const unsigned char *h = h0, *n = n0;
54 170
55 /* special case where s_len == 1 */ 171 /* Return immediately on empty needle */
56 if (s_len == 1) 172 if (!l) return (void *)h;
57 return memchr(l, *cs, l_len);
58 173
59 /* the last position where its possible to find "s" in "l" */ 174 /* Return immediately when needle is longer than haystack */
60 last = cl + l_len - s_len; 175 if (k<l) return 0;
61 176
62 for (cur = cl; cur <= last; cur++) 177 /* Use faster algorithms for short needles */
63 if (cur[0] == cs[0] && memcmp(cur, cs, s_len) == 0) 178 h = memchr(h0, *n, k);
64 return (void *)cur; 179 if (!h || l==1) return (void *)h;
180 k -= h - (const unsigned char *)h0;
181 if (k<l) return 0;
182 if (l==2) return twobyte_memmem(h, k, n);
183 if (l==3) return threebyte_memmem(h, k, n);
184 if (l==4) return fourbyte_memmem(h, k, n);
65 185
66 return NULL; 186 return twoway_memmem(h, h+k, n, l);
67} 187}
68DEF_WEAK(memmem); 188DEF_WEAK(memmem);
69#endif /* HAVE_MEMMEM */ 189#endif /* HAVE_MEMMEM */