diff options
Diffstat (limited to 'openbsd-compat/fnmatch.c')
-rw-r--r-- | openbsd-compat/fnmatch.c | 489 |
1 files changed, 489 insertions, 0 deletions
diff --git a/openbsd-compat/fnmatch.c b/openbsd-compat/fnmatch.c new file mode 100644 index 000000000..88d5dbc3c --- /dev/null +++ b/openbsd-compat/fnmatch.c | |||
@@ -0,0 +1,489 @@ | |||
1 | /* $OpenBSD: fnmatch.c,v 1.21 2016/03/01 20:29:03 millert Exp $ */ | ||
2 | |||
3 | /* Copyright (c) 2011, VMware, Inc. | ||
4 | * All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions are met: | ||
8 | * * Redistributions of source code must retain the above copyright | ||
9 | * notice, this list of conditions and the following disclaimer. | ||
10 | * * Redistributions in binary form must reproduce the above copyright | ||
11 | * notice, this list of conditions and the following disclaimer in the | ||
12 | * documentation and/or other materials provided with the distribution. | ||
13 | * * Neither the name of the VMware, Inc. nor the names of its contributors | ||
14 | * may be used to endorse or promote products derived from this software | ||
15 | * without specific prior written permission. | ||
16 | * | ||
17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
20 | * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR | ||
21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | ||
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * Copyright (c) 2008, 2016 Todd C. Miller <millert@openbsd.org> | ||
31 | * | ||
32 | * Permission to use, copy, modify, and distribute this software for any | ||
33 | * purpose with or without fee is hereby granted, provided that the above | ||
34 | * copyright notice and this permission notice appear in all copies. | ||
35 | * | ||
36 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
37 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
38 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
39 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
40 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
41 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
42 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
43 | */ | ||
44 | |||
45 | /* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011 | ||
46 | * | ||
47 | * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008 | ||
48 | * as described in; | ||
49 | * http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html | ||
50 | * | ||
51 | * Filename pattern matches defined in section 2.13, "Pattern Matching Notation" | ||
52 | * from chapter 2. "Shell Command Language" | ||
53 | * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 | ||
54 | * where; 1. A bracket expression starting with an unquoted <circumflex> '^' | ||
55 | * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' | ||
56 | * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading | ||
57 | * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce | ||
58 | * a valid bracket expression is treated as an ordinary character; 4. a differing | ||
59 | * number of consecutive slashes within pattern and string will NOT match; | ||
60 | * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character. | ||
61 | * | ||
62 | * Bracket expansion defined in section 9.3.5, "RE Bracket Expression", | ||
63 | * from chapter 9, "Regular Expressions" | ||
64 | * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05 | ||
65 | * with no support for collating symbols, equivalence class expressions or | ||
66 | * character class expressions. A partial range expression with a leading | ||
67 | * hyphen following a valid range expression will match only the ordinary | ||
68 | * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters | ||
69 | * 'a' through 'm', a <hyphen> '-', or a 'z'). | ||
70 | * | ||
71 | * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one | ||
72 | * path segment of string, and FNM_CASEFOLD to ignore alpha case. | ||
73 | * | ||
74 | * NOTE: Only POSIX/C single byte locales are correctly supported at this time. | ||
75 | * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results, | ||
76 | * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and | ||
77 | * nonalpha characters within a range. | ||
78 | * | ||
79 | * XXX comments below indicate porting required for multi-byte character sets | ||
80 | * and non-POSIX locale collation orders; requires mbr* APIs to track shift | ||
81 | * state of pattern and string (rewinding pattern and string repeatedly). | ||
82 | * | ||
83 | * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g. | ||
84 | * UTF-8, SHIFT-JIS, etc). Any implementation allowing '\' as an alternate | ||
85 | * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS. | ||
86 | */ | ||
87 | |||
88 | #include <fnmatch.h> | ||
89 | #include <string.h> | ||
90 | #include <ctype.h> | ||
91 | |||
92 | #include "charclass.h" | ||
93 | |||
94 | #define RANGE_MATCH 1 | ||
95 | #define RANGE_NOMATCH 0 | ||
96 | #define RANGE_ERROR (-1) | ||
97 | |||
98 | static int | ||
99 | classmatch(const char *pattern, char test, int foldcase, const char **ep) | ||
100 | { | ||
101 | const char * const mismatch = pattern; | ||
102 | const char *colon; | ||
103 | struct cclass *cc; | ||
104 | int rval = RANGE_NOMATCH; | ||
105 | size_t len; | ||
106 | |||
107 | if (pattern[0] != '[' || pattern[1] != ':') { | ||
108 | *ep = mismatch; | ||
109 | return RANGE_ERROR; | ||
110 | } | ||
111 | pattern += 2; | ||
112 | |||
113 | if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') { | ||
114 | *ep = mismatch; | ||
115 | return RANGE_ERROR; | ||
116 | } | ||
117 | *ep = colon + 2; | ||
118 | len = (size_t)(colon - pattern); | ||
119 | |||
120 | if (foldcase && strncmp(pattern, "upper:]", 7) == 0) | ||
121 | pattern = "lower:]"; | ||
122 | for (cc = cclasses; cc->name != NULL; cc++) { | ||
123 | if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') { | ||
124 | if (cc->isctype((unsigned char)test)) | ||
125 | rval = RANGE_MATCH; | ||
126 | break; | ||
127 | } | ||
128 | } | ||
129 | if (cc->name == NULL) { | ||
130 | /* invalid character class, treat as normal text */ | ||
131 | *ep = mismatch; | ||
132 | rval = RANGE_ERROR; | ||
133 | } | ||
134 | return rval; | ||
135 | } | ||
136 | |||
137 | /* Most MBCS/collation/case issues handled here. Wildcard '*' is not handled. | ||
138 | * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, | ||
139 | * however the "\/" sequence is advanced to '/'. | ||
140 | * | ||
141 | * Both pattern and string are **char to support pointer increment of arbitrary | ||
142 | * multibyte characters for the given locale, in a later iteration of this code | ||
143 | */ | ||
144 | static int fnmatch_ch(const char **pattern, const char **string, int flags) | ||
145 | { | ||
146 | const char * const mismatch = *pattern; | ||
147 | const int nocase = !!(flags & FNM_CASEFOLD); | ||
148 | const int escape = !(flags & FNM_NOESCAPE); | ||
149 | const int slash = !!(flags & FNM_PATHNAME); | ||
150 | int result = FNM_NOMATCH; | ||
151 | const char *startch; | ||
152 | int negate; | ||
153 | |||
154 | if (**pattern == '[') { | ||
155 | ++*pattern; | ||
156 | |||
157 | /* Handle negation, either leading ! or ^ operators */ | ||
158 | negate = (**pattern == '!') || (**pattern == '^'); | ||
159 | if (negate) | ||
160 | ++*pattern; | ||
161 | |||
162 | /* ']' is an ordinary char at the start of the range pattern */ | ||
163 | if (**pattern == ']') | ||
164 | goto leadingclosebrace; | ||
165 | |||
166 | while (**pattern) { | ||
167 | if (**pattern == ']') { | ||
168 | ++*pattern; | ||
169 | /* XXX: Fix for MBCS character width */ | ||
170 | ++*string; | ||
171 | return (result ^ negate); | ||
172 | } | ||
173 | |||
174 | if (escape && (**pattern == '\\')) { | ||
175 | ++*pattern; | ||
176 | |||
177 | /* Patterns must terminate with ']', not EOS */ | ||
178 | if (!**pattern) | ||
179 | break; | ||
180 | } | ||
181 | |||
182 | /* Patterns must terminate with ']' not '/' */ | ||
183 | if (slash && (**pattern == '/')) | ||
184 | break; | ||
185 | |||
186 | /* Match character classes. */ | ||
187 | switch (classmatch(*pattern, **string, nocase, pattern)) { | ||
188 | case RANGE_MATCH: | ||
189 | result = 0; | ||
190 | continue; | ||
191 | case RANGE_NOMATCH: | ||
192 | /* Valid character class but no match. */ | ||
193 | continue; | ||
194 | default: | ||
195 | /* Not a valid character class. */ | ||
196 | break; | ||
197 | } | ||
198 | if (!**pattern) | ||
199 | break; | ||
200 | |||
201 | leadingclosebrace: | ||
202 | /* Look at only well-formed range patterns; | ||
203 | * "x-]" is not allowed unless escaped ("x-\]") | ||
204 | * XXX: Fix for locale/MBCS character width | ||
205 | */ | ||
206 | if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) { | ||
207 | startch = *pattern; | ||
208 | *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2; | ||
209 | |||
210 | /* | ||
211 | * NOT a properly balanced [expr] pattern, EOS | ||
212 | * terminated or ranges containing a slash in | ||
213 | * FNM_PATHNAME mode pattern fall out to to the | ||
214 | * rewind and test '[' literal code path. | ||
215 | */ | ||
216 | if (!**pattern || (slash && (**pattern == '/'))) | ||
217 | break; | ||
218 | |||
219 | /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ | ||
220 | if ((**string >= *startch) && (**string <= **pattern)) | ||
221 | result = 0; | ||
222 | else if (nocase && | ||
223 | (isupper((unsigned char)**string) || | ||
224 | isupper((unsigned char)*startch) || | ||
225 | isupper((unsigned char)**pattern)) && | ||
226 | (tolower((unsigned char)**string) >= | ||
227 | tolower((unsigned char)*startch)) && | ||
228 | (tolower((unsigned char)**string) <= | ||
229 | tolower((unsigned char)**pattern))) | ||
230 | result = 0; | ||
231 | |||
232 | ++*pattern; | ||
233 | continue; | ||
234 | } | ||
235 | |||
236 | /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ | ||
237 | if ((**string == **pattern)) | ||
238 | result = 0; | ||
239 | else if (nocase && (isupper((unsigned char)**string) || | ||
240 | isupper((unsigned char)**pattern)) && | ||
241 | (tolower((unsigned char)**string) == | ||
242 | tolower((unsigned char)**pattern))) | ||
243 | result = 0; | ||
244 | |||
245 | ++*pattern; | ||
246 | } | ||
247 | /* | ||
248 | * NOT a properly balanced [expr] pattern; | ||
249 | * Rewind and reset result to test '[' literal | ||
250 | */ | ||
251 | *pattern = mismatch; | ||
252 | result = FNM_NOMATCH; | ||
253 | } else if (**pattern == '?') { | ||
254 | /* Optimize '?' match before unescaping **pattern */ | ||
255 | if (!**string || (slash && (**string == '/'))) | ||
256 | return FNM_NOMATCH; | ||
257 | result = 0; | ||
258 | goto fnmatch_ch_success; | ||
259 | } else if (escape && (**pattern == '\\') && (*pattern)[1]) { | ||
260 | ++*pattern; | ||
261 | } | ||
262 | |||
263 | /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */ | ||
264 | if (**string == **pattern) | ||
265 | result = 0; | ||
266 | else if (nocase && (isupper((unsigned char)**string) || | ||
267 | isupper((unsigned char)**pattern)) && | ||
268 | (tolower((unsigned char)**string) == | ||
269 | tolower((unsigned char)**pattern))) | ||
270 | result = 0; | ||
271 | |||
272 | /* Refuse to advance over trailing slash or NULs */ | ||
273 | if (**string == '\0' || **pattern == '\0' || | ||
274 | (slash && ((**string == '/') || (**pattern == '/')))) | ||
275 | return result; | ||
276 | |||
277 | fnmatch_ch_success: | ||
278 | ++*pattern; | ||
279 | ++*string; | ||
280 | return result; | ||
281 | } | ||
282 | |||
283 | |||
284 | int fnmatch(const char *pattern, const char *string, int flags) | ||
285 | { | ||
286 | static const char dummystring[2] = {' ', 0}; | ||
287 | const int escape = !(flags & FNM_NOESCAPE); | ||
288 | const int slash = !!(flags & FNM_PATHNAME); | ||
289 | const int leading_dir = !!(flags & FNM_LEADING_DIR); | ||
290 | const char *dummyptr, *matchptr, *strendseg; | ||
291 | int wild; | ||
292 | /* For '*' wild processing only; surpress 'used before initialization' | ||
293 | * warnings with dummy initialization values; | ||
294 | */ | ||
295 | const char *strstartseg = NULL; | ||
296 | const char *mismatch = NULL; | ||
297 | int matchlen = 0; | ||
298 | |||
299 | if (*pattern == '*') | ||
300 | goto firstsegment; | ||
301 | |||
302 | while (*pattern && *string) { | ||
303 | /* | ||
304 | * Pre-decode "\/" which has no special significance, and | ||
305 | * match balanced slashes, starting a new segment pattern. | ||
306 | */ | ||
307 | if (slash && escape && (*pattern == '\\') && (pattern[1] == '/')) | ||
308 | ++pattern; | ||
309 | if (slash && (*pattern == '/') && (*string == '/')) { | ||
310 | ++pattern; | ||
311 | ++string; | ||
312 | } | ||
313 | |||
314 | firstsegment: | ||
315 | /* | ||
316 | * At the beginning of each segment, validate leading period | ||
317 | * behavior. | ||
318 | */ | ||
319 | if ((flags & FNM_PERIOD) && (*string == '.')) { | ||
320 | if (*pattern == '.') | ||
321 | ++pattern; | ||
322 | else if (escape && (*pattern == '\\') && (pattern[1] == '.')) | ||
323 | pattern += 2; | ||
324 | else | ||
325 | return FNM_NOMATCH; | ||
326 | ++string; | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * Determine the end of string segment. Presumes '/' | ||
331 | * character is unique, not composite in any MBCS encoding | ||
332 | */ | ||
333 | if (slash) { | ||
334 | strendseg = strchr(string, '/'); | ||
335 | if (!strendseg) | ||
336 | strendseg = strchr(string, '\0'); | ||
337 | } else { | ||
338 | strendseg = strchr(string, '\0'); | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * Allow pattern '*' to be consumed even with no remaining | ||
343 | * string to match. | ||
344 | */ | ||
345 | while (*pattern) { | ||
346 | if ((string > strendseg) || | ||
347 | ((string == strendseg) && (*pattern != '*'))) | ||
348 | break; | ||
349 | |||
350 | if (slash && ((*pattern == '/') || | ||
351 | (escape && (*pattern == '\\') && (pattern[1] == '/')))) | ||
352 | break; | ||
353 | |||
354 | /* | ||
355 | * Reduce groups of '*' and '?' to n '?' matches | ||
356 | * followed by one '*' test for simplicity. | ||
357 | */ | ||
358 | for (wild = 0; (*pattern == '*') || (*pattern == '?'); ++pattern) { | ||
359 | if (*pattern == '*') { | ||
360 | wild = 1; | ||
361 | } else if (string < strendseg) { /* && (*pattern == '?') */ | ||
362 | /* XXX: Advance 1 char for MBCS locale */ | ||
363 | ++string; | ||
364 | } | ||
365 | else { /* (string >= strendseg) && (*pattern == '?') */ | ||
366 | return FNM_NOMATCH; | ||
367 | } | ||
368 | } | ||
369 | |||
370 | if (wild) { | ||
371 | strstartseg = string; | ||
372 | mismatch = pattern; | ||
373 | |||
374 | /* | ||
375 | * Count fixed (non '*') char matches remaining | ||
376 | * in pattern * excluding '/' (or "\/") and '*'. | ||
377 | */ | ||
378 | for (matchptr = pattern, matchlen = 0; 1; ++matchlen) { | ||
379 | if ((*matchptr == '\0') || | ||
380 | (slash && ((*matchptr == '/') || | ||
381 | (escape && (*matchptr == '\\') && | ||
382 | (matchptr[1] == '/'))))) { | ||
383 | /* Compare precisely this many | ||
384 | * trailing string chars, the | ||
385 | * resulting match needs no | ||
386 | * wildcard loop. | ||
387 | */ | ||
388 | /* XXX: Adjust for MBCS */ | ||
389 | if (string + matchlen > strendseg) | ||
390 | return FNM_NOMATCH; | ||
391 | |||
392 | string = strendseg - matchlen; | ||
393 | wild = 0; | ||
394 | break; | ||
395 | } | ||
396 | |||
397 | if (*matchptr == '*') { | ||
398 | /* | ||
399 | * Ensure at least this many | ||
400 | * trailing string chars remain | ||
401 | * for the first comparison. | ||
402 | */ | ||
403 | /* XXX: Adjust for MBCS */ | ||
404 | if (string + matchlen > strendseg) | ||
405 | return FNM_NOMATCH; | ||
406 | |||
407 | /* | ||
408 | * Begin first wild comparison | ||
409 | * at the current position. | ||
410 | */ | ||
411 | break; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Skip forward in pattern by a single | ||
416 | * character match Use a dummy | ||
417 | * fnmatch_ch() test to count one | ||
418 | * "[range]" escape. | ||
419 | */ | ||
420 | /* XXX: Adjust for MBCS */ | ||
421 | if (escape && (*matchptr == '\\') && | ||
422 | matchptr[1]) { | ||
423 | matchptr += 2; | ||
424 | } else if (*matchptr == '[') { | ||
425 | dummyptr = dummystring; | ||
426 | fnmatch_ch(&matchptr, &dummyptr, | ||
427 | flags); | ||
428 | } else { | ||
429 | ++matchptr; | ||
430 | } | ||
431 | } | ||
432 | } | ||
433 | |||
434 | /* Incrementally match string against the pattern. */ | ||
435 | while (*pattern && (string < strendseg)) { | ||
436 | /* Success; begin a new wild pattern search. */ | ||
437 | if (*pattern == '*') | ||
438 | break; | ||
439 | |||
440 | if (slash && ((*string == '/') || | ||
441 | (*pattern == '/') || (escape && | ||
442 | (*pattern == '\\') && (pattern[1] == '/')))) | ||
443 | break; | ||
444 | |||
445 | /* | ||
446 | * Compare ch's (the pattern is advanced over | ||
447 | * "\/" to the '/', but slashes will mismatch, | ||
448 | * and are not consumed). | ||
449 | */ | ||
450 | if (!fnmatch_ch(&pattern, &string, flags)) | ||
451 | continue; | ||
452 | |||
453 | /* | ||
454 | * Failed to match, loop against next char | ||
455 | * offset of string segment until not enough | ||
456 | * string chars remain to match the fixed | ||
457 | * pattern. | ||
458 | */ | ||
459 | if (wild) { | ||
460 | /* XXX: Advance 1 char for MBCS locale */ | ||
461 | string = ++strstartseg; | ||
462 | if (string + matchlen > strendseg) | ||
463 | return FNM_NOMATCH; | ||
464 | |||
465 | pattern = mismatch; | ||
466 | continue; | ||
467 | } else | ||
468 | return FNM_NOMATCH; | ||
469 | } | ||
470 | } | ||
471 | |||
472 | if (*string && !((slash || leading_dir) && (*string == '/'))) | ||
473 | return FNM_NOMATCH; | ||
474 | |||
475 | if (*pattern && !(slash && ((*pattern == '/') || | ||
476 | (escape && (*pattern == '\\') && (pattern[1] == '/'))))) | ||
477 | return FNM_NOMATCH; | ||
478 | |||
479 | if (leading_dir && !*pattern && *string == '/') | ||
480 | return 0; | ||
481 | } | ||
482 | |||
483 | /* Where both pattern and string are at EOS, declare success. */ | ||
484 | if (!*string && !*pattern) | ||
485 | return 0; | ||
486 | |||
487 | /* Pattern didn't match to the end of string. */ | ||
488 | return FNM_NOMATCH; | ||
489 | } | ||