1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005
2 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int FCT (const CHAR *pattern, const CHAR *string,
22 const CHAR *string_end, int no_leading_period, int flags)
24 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
25 const CHAR *string_end, int no_leading_period, int flags)
27 static const CHAR *END (const CHAR *patternp) internal_function;
31 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
32 int no_leading_period, int flags)
34 register const CHAR *p = pattern, *n = string;
37 # if WIDE_CHAR_VERSION
38 const char *collseq = (const char *)
39 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
41 const UCHAR *collseq = (const UCHAR *)
42 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
46 while ((c = *p++) != L('\0'))
48 int new_no_leading_period = 0;
54 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
58 res = EXT (c, p, n, string_end, no_leading_period,
66 else if (*n == L('/') && (flags & FNM_FILE_NAME))
68 else if (*n == L('.') && no_leading_period)
73 if (!(flags & FNM_NOESCAPE))
77 /* Trailing \ loses. */
81 if (n == string_end || FOLD ((UCHAR) *n) != c)
86 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
90 res = EXT (c, p, n, string_end, no_leading_period,
96 if (n != string_end && *n == L('.') && no_leading_period)
99 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
101 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
103 const CHAR *endp = END (p);
106 /* This is a pattern. Skip over it. */
114 /* A ? needs to match one character. */
116 /* There isn't another character; no match. */
118 else if (*n == L('/')
119 && __builtin_expect (flags & FNM_FILE_NAME, 0))
120 /* A slash does not match a wildcard under
124 /* One character of the string is consumed in matching
125 this ? wildcard, so *??? won't match if there are
126 less than three characters. */
132 /* The wildcard(s) is/are the last element of the pattern.
133 If the name is a file name and contains another slash
134 this means it cannot match, unless the FNM_LEADING_DIR
137 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
139 if (flags & FNM_FILE_NAME)
141 if (flags & FNM_LEADING_DIR)
145 if (MEMCHR (n, L('/'), string_end - n) == NULL)
156 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
162 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
163 && (c == L('@') || c == L('+') || c == L('!'))
166 int flags2 = ((flags & FNM_FILE_NAME)
167 ? flags : (flags & ~FNM_PERIOD));
168 int no_leading_period2 = no_leading_period;
170 for (--p; n < endp; ++n, no_leading_period2 = 0)
171 if (FCT (p, n, string_end, no_leading_period2, flags2)
175 else if (c == L('/') && (flags & FNM_FILE_NAME))
177 while (n < string_end && *n != L('/'))
179 if (n < string_end && *n == L('/')
180 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
186 int flags2 = ((flags & FNM_FILE_NAME)
187 ? flags : (flags & ~FNM_PERIOD));
188 int no_leading_period2 = no_leading_period;
190 if (c == L('\\') && !(flags & FNM_NOESCAPE))
193 for (--p; n < endp; ++n, no_leading_period2 = 0)
194 if (FOLD ((UCHAR) *n) == c
195 && (FCT (p, n, string_end, no_leading_period2, flags2)
201 /* If we come here no match is possible with the wildcard. */
206 /* Nonzero if the sense of the character class is inverted. */
211 if (posixly_correct == 0)
212 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
217 if (*n == L('.') && no_leading_period)
220 if (*n == L('/') && (flags & FNM_FILE_NAME))
221 /* `/' cannot be matched. */
224 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
228 fn = FOLD ((UCHAR) *n);
233 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
237 c = FOLD ((UCHAR) *p);
242 else if (c == L('[') && *p == L(':'))
244 /* Leave room for the null. */
245 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
247 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
250 const CHAR *startp = p;
254 if (c1 == CHAR_CLASS_MAX_LENGTH)
255 /* The name is too long and therefore the pattern
260 if (c == L(':') && p[1] == L(']'))
265 if (c < L('a') || c >= L('z'))
267 /* This cannot possibly be a character class name.
268 Match it as a normal range. */
277 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
278 wt = IS_CHAR_CLASS (str);
280 /* Invalid character class name. */
283 # if defined _LIBC && ! WIDE_CHAR_VERSION
284 /* The following code is glibc specific but does
285 there a good job in speeding up the code since
286 we can avoid the btowc() call. */
287 if (_ISCTYPE ((UCHAR) *n, wt))
290 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
294 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
295 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
296 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
297 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
298 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
299 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
300 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
301 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
302 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
303 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
304 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
305 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
311 else if (c == L('[') && *p == L('='))
315 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
316 const CHAR *startp = p;
328 if (c != L('=') || p[1] != L(']'))
338 if ((UCHAR) *n == str[0])
343 const int32_t *table;
344 # if WIDE_CHAR_VERSION
345 const int32_t *weights;
346 const int32_t *extra;
348 const unsigned char *weights;
349 const unsigned char *extra;
351 const int32_t *indirect;
353 const UCHAR *cp = (const UCHAR *) str;
355 /* This #include defines a local function! */
356 # if WIDE_CHAR_VERSION
357 # include <locale/weightwc.h>
359 # include <locale/weight.h>
362 # if WIDE_CHAR_VERSION
363 table = (const int32_t *)
364 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
365 weights = (const int32_t *)
366 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
367 extra = (const int32_t *)
368 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
369 indirect = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
372 table = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
374 weights = (const unsigned char *)
375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
376 extra = (const unsigned char *)
377 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
378 indirect = (const int32_t *)
379 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
385 /* We found a table entry. Now see whether the
386 character we are currently at has the same
387 equivalance class value. */
388 int len = weights[idx];
390 const UCHAR *np = (const UCHAR *) n;
392 idx2 = findidx (&np);
393 if (idx2 != 0 && len == weights[idx2])
398 && (weights[idx + 1 + cnt]
399 == weights[idx2 + 1 + cnt]))
411 else if (c == L('\0'))
412 /* [ (unterminated) loses. */
421 if (c == L('[') && *p == L('.'))
424 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
425 const CHAR *startp = p;
431 if (c == L('.') && p[1] == L(']'))
441 /* We have to handling the symbols differently in
442 ranges since then the collation sequence is
444 is_range = *p == L('-') && p[1] != L('\0');
448 /* There are no names defined in the collation
449 data. Therefore we only accept the trivial
450 names consisting of the character itself. */
454 if (!is_range && *n == startp[1])
463 const int32_t *symb_table;
464 # ifdef WIDE_CHAR_VERSION
468 # define str (startp + 1)
470 const unsigned char *extra;
476 # ifdef WIDE_CHAR_VERSION
477 /* We have to convert the name to a single-byte
478 string. This is possible since the names
479 consist of ASCII characters and the internal
480 representation is UCS4. */
481 for (strcnt = 0; strcnt < c1; ++strcnt)
482 str[strcnt] = startp[1 + strcnt];
486 _NL_CURRENT_WORD (LC_COLLATE,
487 _NL_COLLATE_SYMB_HASH_SIZEMB);
488 symb_table = (const int32_t *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_TABLEMB);
491 extra = (const unsigned char *)
492 _NL_CURRENT (LC_COLLATE,
493 _NL_COLLATE_SYMB_EXTRAMB);
495 /* Locate the character in the hashing table. */
496 hash = elem_hash (str, c1);
499 elem = hash % table_size;
500 if (symb_table[2 * elem] != 0)
502 second = hash % (table_size - 2) + 1;
506 /* First compare the hashing value. */
507 if (symb_table[2 * elem] == hash
509 == extra[symb_table[2 * elem + 1]])
511 &extra[symb_table[2 * elem
515 /* Yep, this is the entry. */
516 idx = symb_table[2 * elem + 1];
517 idx += 1 + extra[idx];
524 while (symb_table[2 * elem] != 0);
527 if (symb_table[2 * elem] != 0)
529 /* Compare the byte sequence but only if
530 this is not part of a range. */
531 # ifdef WIDE_CHAR_VERSION
534 idx += 1 + extra[idx];
535 /* Adjust for the alignment. */
536 idx = (idx + 3) & ~3;
538 wextra = (int32_t *) &extra[idx + 4];
543 # ifdef WIDE_CHAR_VERSION
545 (int32_t) c1 < wextra[idx];
547 if (n[c1] != wextra[1 + c1])
550 if ((int32_t) c1 == wextra[idx])
553 for (c1 = 0; c1 < extra[idx]; ++c1)
554 if (n[c1] != extra[1 + c1])
557 if (c1 == extra[idx])
562 /* Get the collation sequence value. */
564 # ifdef WIDE_CHAR_VERSION
565 cold = wextra[1 + wextra[idx]];
567 /* Adjust for the alignment. */
568 idx += 1 + extra[idx];
569 idx = (idx + 3) & ~4;
570 cold = *((int32_t *) &extra[idx]);
577 /* No valid character. Match it as a
579 if (!is_range && *n == str[0])
596 /* We have to handling the symbols differently in
597 ranges since then the collation sequence is
599 is_range = (*p == L('-') && p[1] != L('\0')
602 if (!is_range && c == fn)
605 /* This is needed if we goto normal_bracket; from
606 outside of is_seqval's scope. */
607 #ifndef __UCLIBC__ /* this should be probably ifdef _LIBC*/
614 if (c == L('-') && *p != L(']'))
617 /* We have to find the collation sequence
618 value for C. Collation sequence is nothing
619 we can regularly access. The sequence
620 value is defined by the order in which the
621 definitions of the collation values for the
622 various characters appear in the source
623 file. A strange concept, nowhere
629 # ifdef WIDE_CHAR_VERSION
630 /* Search in the `names' array for the characters. */
631 fcollseq = __collseq_table_lookup (collseq, fn);
632 if (fcollseq == ~((uint32_t) 0))
633 /* XXX We don't know anything about the character
634 we are supposed to match. This means we are
636 goto range_not_matched;
641 lcollseq = __collseq_table_lookup (collseq, cold);
643 fcollseq = collseq[fn];
644 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
648 if (cend == L('[') && *p == L('.'))
651 _NL_CURRENT_WORD (LC_COLLATE,
653 const CHAR *startp = p;
659 if (c == L('.') && p[1] == L(']'))
671 /* There are no names defined in the
672 collation data. Therefore we only
673 accept the trivial names consisting
674 of the character itself. */
683 const int32_t *symb_table;
684 # ifdef WIDE_CHAR_VERSION
688 # define str (startp + 1)
690 const unsigned char *extra;
696 # ifdef WIDE_CHAR_VERSION
697 /* We have to convert the name to a single-byte
698 string. This is possible since the names
699 consist of ASCII characters and the internal
700 representation is UCS4. */
701 for (strcnt = 0; strcnt < c1; ++strcnt)
702 str[strcnt] = startp[1 + strcnt];
706 _NL_CURRENT_WORD (LC_COLLATE,
707 _NL_COLLATE_SYMB_HASH_SIZEMB);
708 symb_table = (const int32_t *)
709 _NL_CURRENT (LC_COLLATE,
710 _NL_COLLATE_SYMB_TABLEMB);
711 extra = (const unsigned char *)
712 _NL_CURRENT (LC_COLLATE,
713 _NL_COLLATE_SYMB_EXTRAMB);
715 /* Locate the character in the hashing
717 hash = elem_hash (str, c1);
720 elem = hash % table_size;
721 if (symb_table[2 * elem] != 0)
723 second = hash % (table_size - 2) + 1;
727 /* First compare the hashing value. */
728 if (symb_table[2 * elem] == hash
730 == extra[symb_table[2 * elem + 1]])
732 &extra[symb_table[2 * elem + 1]
735 /* Yep, this is the entry. */
736 idx = symb_table[2 * elem + 1];
737 idx += 1 + extra[idx];
744 while (symb_table[2 * elem] != 0);
747 if (symb_table[2 * elem] != 0)
749 /* Compare the byte sequence but only if
750 this is not part of a range. */
751 # ifdef WIDE_CHAR_VERSION
754 idx += 1 + extra[idx];
755 /* Adjust for the alignment. */
756 idx = (idx + 3) & ~4;
758 wextra = (int32_t *) &extra[idx + 4];
760 /* Get the collation sequence value. */
762 # ifdef WIDE_CHAR_VERSION
763 cend = wextra[1 + wextra[idx]];
765 /* Adjust for the alignment. */
766 idx += 1 + extra[idx];
767 idx = (idx + 3) & ~4;
768 cend = *((int32_t *) &extra[idx]);
771 else if (symb_table[2 * elem] != 0 && c1 == 1)
783 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
790 /* XXX It is not entirely clear to me how to handle
791 characters which are not mentioned in the
792 collation specification. */
794 # ifdef WIDE_CHAR_VERSION
795 lcollseq == 0xffffffff ||
797 lcollseq <= fcollseq)
799 /* We have to look at the upper bound. */
806 # ifdef WIDE_CHAR_VERSION
808 __collseq_table_lookup (collseq, cend);
809 if (hcollseq == ~((uint32_t) 0))
811 /* Hum, no information about the upper
812 bound. The matching succeeds if the
813 lower bound is matched exactly. */
814 if (lcollseq != fcollseq)
815 goto range_not_matched;
820 hcollseq = collseq[cend];
824 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
827 # ifdef WIDE_CHAR_VERSION
831 /* We use a boring value comparison of the character
832 values. This is better than comparing using
833 `strcoll' since the latter would have surprising
834 and sometimes fatal consequences. */
837 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
843 if (cold <= fn && fn <= cend)
860 /* Skip the rest of the [...] that already matched. */
867 /* [... (unterminated) loses. */
870 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
874 /* XXX 1003.2d11 is unclear if this is right. */
877 else if (c == L('[') && *p == L(':'))
880 const CHAR *startp = p;
885 if (++c1 == CHAR_CLASS_MAX_LENGTH)
888 if (*p == L(':') && p[1] == L(']'))
891 if (c < L('a') || c >= L('z'))
900 else if (c == L('[') && *p == L('='))
906 if (c != L('=') || p[1] != L(']'))
911 else if (c == L('[') && *p == L('.'))
920 if (*p == L('.') && p[1] == L(']'))
936 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
940 res = EXT (c, p, n, string_end, no_leading_period, flags);
947 if (NO_LEADING_PERIOD (flags))
949 if (n == string_end || c != (UCHAR) *n)
952 new_no_leading_period = 1;
958 if (n == string_end || c != FOLD ((UCHAR) *n))
962 no_leading_period = new_no_leading_period;
969 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
970 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
979 END (const CHAR *pattern)
981 const CHAR *p = pattern;
985 /* This is an invalid pattern. */
987 else if (*p == L('['))
989 /* Handle brackets special. */
990 if (posixly_correct == 0)
991 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
993 /* Skip the not sign. We have to recognize it because of a possibly
995 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
997 /* A leading ']' is recognized as such. */
1000 /* Skip over all characters of the list. */
1001 while (*p != L(']'))
1002 if (*p++ == L('\0'))
1003 /* This is no valid pattern. */
1006 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1007 || *p == L('!')) && p[1] == L('('))
1009 else if (*p == L(')'))
1018 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1019 int no_leading_period, int flags)
1025 struct patternlist *next;
1028 struct patternlist **lastp = &list;
1029 size_t pattern_len = STRLEN (pattern);
1033 /* Parse the pattern. Store the individual parts in the list. */
1035 for (startp = p = pattern + 1; level >= 0; ++p)
1037 /* This is an invalid pattern. */
1039 else if (*p == L('['))
1041 /* Handle brackets special. */
1042 if (posixly_correct == 0)
1043 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1045 /* Skip the not sign. We have to recognize it because of a possibly
1047 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1049 /* A leading ']' is recognized as such. */
1052 /* Skip over all characters of the list. */
1053 while (*p != L(']'))
1054 if (*p++ == L('\0'))
1055 /* This is no valid pattern. */
1058 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1059 || *p == L('!')) && p[1] == L('('))
1060 /* Remember the nesting level. */
1062 else if (*p == L(')'))
1066 /* This means we found the end of the pattern. */
1067 #define NEW_PATTERN \
1068 struct patternlist *newp; \
1070 if (opt == L('?') || opt == L('@')) \
1071 newp = alloca (sizeof (struct patternlist) \
1072 + (pattern_len * sizeof (CHAR))); \
1074 newp = alloca (sizeof (struct patternlist) \
1075 + ((p - startp + 1) * sizeof (CHAR))); \
1076 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1077 newp->next = NULL; \
1083 else if (*p == L('|'))
1091 assert (list != NULL);
1092 assert (p[-1] == L(')'));
1098 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1105 for (rs = string; rs <= string_end; ++rs)
1106 /* First match the prefix with the current pattern with the
1108 if (FCT (list->str, string, rs, no_leading_period,
1109 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1110 /* This was successful. Now match the rest with the rest
1112 && (FCT (p, rs, string_end,
1115 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1116 flags & FNM_FILE_NAME
1117 ? flags : flags & ~FNM_PERIOD) == 0
1118 /* This didn't work. Try the whole pattern. */
1120 && FCT (pattern - 1, rs, string_end,
1123 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1125 flags & FNM_FILE_NAME
1126 ? flags : flags & ~FNM_PERIOD) == 0)))
1127 /* It worked. Signal success. */
1130 while ((list = list->next) != NULL);
1132 /* None of the patterns lead to a match. */
1136 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1142 /* I cannot believe it but `strcat' is actually acceptable
1143 here. Match the entire string with the prefix from the
1144 pattern list and the rest of the pattern following the
1146 if (FCT (STRCAT (list->str, p), string, string_end,
1148 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1149 /* It worked. Signal success. */
1151 while ((list = list->next) != NULL);
1153 /* None of the patterns lead to a match. */
1157 for (rs = string; rs <= string_end; ++rs)
1159 struct patternlist *runp;
1161 for (runp = list; runp != NULL; runp = runp->next)
1162 if (FCT (runp->str, string, rs, no_leading_period,
1163 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1166 /* If none of the patterns matched see whether the rest does. */
1168 && (FCT (p, rs, string_end,
1171 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1172 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1174 /* This is successful. */
1178 /* None of the patterns together with the rest of the pattern
1183 assert (! "Invalid extended matching operator");