mirror of https://github.com/openSUSE/libsolv.git
321 lines
7.2 KiB
C
321 lines
7.2 KiB
C
/*
|
|
* An implementation of what I call the "Sea of Stars" algorithm for
|
|
* POSIX fnmatch(). The basic idea is that we factor the pattern into
|
|
* a head component (which we match first and can reject without ever
|
|
* measuring the length of the string), an optional tail component
|
|
* (which only exists if the pattern contains at least one star), and
|
|
* an optional "sea of stars", a set of star-separated components
|
|
* between the head and tail. After the head and tail matches have
|
|
* been removed from the input string, the components in the "sea of
|
|
* stars" are matched sequentially by searching for their first
|
|
* occurrence past the end of the previous match.
|
|
*
|
|
* - Rich Felker, April 2012
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <fnmatch.h>
|
|
#include <stdlib.h>
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
|
|
#define END 0
|
|
#define UNMATCHABLE -2
|
|
#define BRACKET -3
|
|
#define QUESTION -4
|
|
#define STAR -5
|
|
|
|
static int str_next(const char *str, size_t n, size_t *step)
|
|
{
|
|
if (!n) {
|
|
*step = 0;
|
|
return 0;
|
|
}
|
|
if (str[0] >= 128U) {
|
|
wchar_t wc;
|
|
int k = mbtowc(&wc, str, n);
|
|
if (k<0) {
|
|
*step = 1;
|
|
return -1;
|
|
}
|
|
*step = k;
|
|
return wc;
|
|
}
|
|
*step = 1;
|
|
return str[0];
|
|
}
|
|
|
|
static int pat_next(const char *pat, size_t m, size_t *step, int flags)
|
|
{
|
|
int esc = 0;
|
|
if (!m || !*pat) {
|
|
*step = 0;
|
|
return END;
|
|
}
|
|
*step = 1;
|
|
if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
|
|
*step = 2;
|
|
pat++;
|
|
esc = 1;
|
|
goto escaped;
|
|
}
|
|
if (pat[0]=='[') {
|
|
size_t k = 1;
|
|
if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
|
|
if (k<m) if (pat[k] == ']') k++;
|
|
for (; k<m && pat[k] && pat[k]!=']'; k++) {
|
|
if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
|
|
int z = pat[k+1];
|
|
k+=2;
|
|
if (k<m && pat[k]) k++;
|
|
while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
|
|
if (k==m || !pat[k]) break;
|
|
}
|
|
}
|
|
if (k==m || !pat[k]) {
|
|
*step = 1;
|
|
return '[';
|
|
}
|
|
*step = k+1;
|
|
return BRACKET;
|
|
}
|
|
if (pat[0] == '*')
|
|
return STAR;
|
|
if (pat[0] == '?')
|
|
return QUESTION;
|
|
escaped:
|
|
if (pat[0] >= 128U) {
|
|
wchar_t wc;
|
|
int k = mbtowc(&wc, pat, m);
|
|
if (k<0) {
|
|
*step = 0;
|
|
return UNMATCHABLE;
|
|
}
|
|
*step = k + esc;
|
|
return wc;
|
|
}
|
|
return pat[0];
|
|
}
|
|
|
|
static int casefold(int k)
|
|
{
|
|
int c = towupper(k);
|
|
return c == k ? towlower(k) : c;
|
|
}
|
|
|
|
static int match_bracket(const char *p, int k, int kfold)
|
|
{
|
|
wchar_t wc;
|
|
int inv = 0;
|
|
p++;
|
|
if (*p=='^' || *p=='!') {
|
|
inv = 1;
|
|
p++;
|
|
}
|
|
if (*p==']') {
|
|
if (k==']') return !inv;
|
|
p++;
|
|
} else if (*p=='-') {
|
|
if (k=='-') return !inv;
|
|
p++;
|
|
}
|
|
wc = p[-1];
|
|
for (; *p != ']'; p++) {
|
|
if (p[0]=='-' && p[1]!=']') {
|
|
wchar_t wc2;
|
|
int l = mbtowc(&wc2, p+1, 4);
|
|
if (l < 0) return 0;
|
|
if (wc <= wc2)
|
|
if ((unsigned)k-wc <= wc2-wc ||
|
|
(unsigned)kfold-wc <= wc2-wc)
|
|
return !inv;
|
|
p += l-1;
|
|
continue;
|
|
}
|
|
if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
|
|
const char *p0 = p+2;
|
|
int z = p[1];
|
|
p+=3;
|
|
while (p[-1]!=z || p[0]!=']') p++;
|
|
if (z == ':' && p-1-p0 < 16) {
|
|
char buf[16];
|
|
memcpy(buf, p0, p-1-p0);
|
|
buf[p-1-p0] = 0;
|
|
if (iswctype(k, wctype(buf)) ||
|
|
iswctype(kfold, wctype(buf)))
|
|
return !inv;
|
|
}
|
|
continue;
|
|
}
|
|
if (*p < 128U) {
|
|
wc = (unsigned char)*p;
|
|
} else {
|
|
int l = mbtowc(&wc, p, 4);
|
|
if (l < 0) return 0;
|
|
p += l-1;
|
|
}
|
|
if (wc==k || wc==kfold) return !inv;
|
|
}
|
|
return inv;
|
|
}
|
|
|
|
static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
|
|
{
|
|
const char *p, *ptail, *endpat;
|
|
const char *s, *stail, *endstr;
|
|
size_t pinc, sinc, tailcnt=0;
|
|
int c, k, kfold;
|
|
|
|
if (flags & FNM_PERIOD) {
|
|
if (*str == '.' && *pat != '.')
|
|
return FNM_NOMATCH;
|
|
}
|
|
for (;;) {
|
|
switch ((c = pat_next(pat, m, &pinc, flags))) {
|
|
case UNMATCHABLE:
|
|
return FNM_NOMATCH;
|
|
case STAR:
|
|
pat++;
|
|
m--;
|
|
break;
|
|
default:
|
|
k = str_next(str, n, &sinc);
|
|
if (k <= 0)
|
|
return (c==END) ? 0 : FNM_NOMATCH;
|
|
str += sinc;
|
|
n -= sinc;
|
|
kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
|
|
if (c == BRACKET) {
|
|
if (!match_bracket(pat, k, kfold))
|
|
return FNM_NOMATCH;
|
|
} else if (c != QUESTION && k != c && kfold != c) {
|
|
return FNM_NOMATCH;
|
|
}
|
|
pat+=pinc;
|
|
m-=pinc;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* Compute real pat length if it was initially unknown/-1 */
|
|
m = strnlen(pat, m);
|
|
endpat = pat + m;
|
|
|
|
/* Find the last * in pat and count chars needed after it */
|
|
for (p=ptail=pat; p<endpat; p+=pinc) {
|
|
switch (pat_next(p, endpat-p, &pinc, flags)) {
|
|
case UNMATCHABLE:
|
|
return FNM_NOMATCH;
|
|
case STAR:
|
|
tailcnt=0;
|
|
ptail = p+1;
|
|
break;
|
|
default:
|
|
tailcnt++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Past this point we need not check for UNMATCHABLE in pat,
|
|
* because all of pat has already been parsed once. */
|
|
|
|
/* Compute real str length if it was initially unknown/-1 */
|
|
n = strnlen(str, n);
|
|
endstr = str + n;
|
|
if (n < tailcnt) return FNM_NOMATCH;
|
|
|
|
/* Find the final tailcnt chars of str, accounting for UTF-8.
|
|
* On illegal sequences we may get it wrong, but in that case
|
|
* we necessarily have a matching failure anyway. */
|
|
for (s=endstr; s>str && tailcnt; tailcnt--) {
|
|
if (s[-1] < 128U || MB_CUR_MAX==1) s--;
|
|
else while ((unsigned char)*--s-0x80U<0x40 && s>str);
|
|
}
|
|
if (tailcnt) return FNM_NOMATCH;
|
|
stail = s;
|
|
|
|
/* Check that the pat and str tails match */
|
|
p = ptail;
|
|
for (;;) {
|
|
c = pat_next(p, endpat-p, &pinc, flags);
|
|
p += pinc;
|
|
if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
|
|
if (c != END) return FNM_NOMATCH;
|
|
break;
|
|
}
|
|
s += sinc;
|
|
kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
|
|
if (c == BRACKET) {
|
|
if (!match_bracket(p-pinc, k, kfold))
|
|
return FNM_NOMATCH;
|
|
} else if (c != QUESTION && k != c && kfold != c) {
|
|
return FNM_NOMATCH;
|
|
}
|
|
}
|
|
|
|
/* We're all done with the tails now, so throw them out */
|
|
endstr = stail;
|
|
endpat = ptail;
|
|
|
|
/* Match pattern components until there are none left */
|
|
while (pat<endpat) {
|
|
p = pat;
|
|
s = str;
|
|
for (;;) {
|
|
c = pat_next(p, endpat-p, &pinc, flags);
|
|
p += pinc;
|
|
/* Encountering * completes/commits a component */
|
|
if (c == STAR) {
|
|
pat = p;
|
|
str = s;
|
|
break;
|
|
}
|
|
k = str_next(s, endstr-s, &sinc);
|
|
if (!k)
|
|
return FNM_NOMATCH;
|
|
kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
|
|
if (c == BRACKET) {
|
|
if (!match_bracket(p-pinc, k, kfold))
|
|
break;
|
|
} else if (c != QUESTION && k != c && kfold != c) {
|
|
break;
|
|
}
|
|
s += sinc;
|
|
}
|
|
if (c == STAR) continue;
|
|
/* If we failed, advance str, by 1 char if it's a valid
|
|
* char, or past all invalid bytes otherwise. */
|
|
k = str_next(str, endstr-str, &sinc);
|
|
if (k > 0) str += sinc;
|
|
else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int fnmatch(const char *pat, const char *str, int flags)
|
|
{
|
|
const char *s, *p;
|
|
size_t inc;
|
|
int c;
|
|
if (flags & FNM_PATHNAME) for (;;) {
|
|
for (s=str; *s && *s!='/'; s++);
|
|
for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
|
|
if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
|
|
return FNM_NOMATCH;
|
|
if (fnmatch_internal(pat, p-pat, str, s-str, flags))
|
|
return FNM_NOMATCH;
|
|
if (!c) return 0;
|
|
str = s+1;
|
|
pat = p+inc;
|
|
} else if (flags & FNM_LEADING_DIR) {
|
|
for (s=str; *s; s++) {
|
|
if (*s != '/') continue;
|
|
if (!fnmatch_internal(pat, -1, str, s-str, flags))
|
|
return 0;
|
|
}
|
|
}
|
|
return fnmatch_internal(pat, -1, str, -1, flags);
|
|
}
|