diff --git a/format.c b/format.c index da45ed0..f42ccc3 100644 --- a/format.c +++ b/format.c @@ -235,7 +235,7 @@ xs_str *sanitize(const char *content) if (valid_tags[i]) { /* accepted tag: rebuild it with only the accepted elements */ - xs *el = xs_regex_match(v, "(src|href|rel|class|target)=\"[^\"]*\""); + xs *el = xs_regex_select(v, "(src|href|rel|class|target)=\"[^\"]*\""); xs *s3 = xs_join(el, " "); s2 = xs_fmt("<%s%s%s%s>", diff --git a/snac.c b/snac.c index 06f7072..3f6d747 100644 --- a/snac.c +++ b/snac.c @@ -17,6 +17,7 @@ #include "xs_time.h" #include "xs_glob.h" #include "xs_random.h" +#include "xs_match.h" #include "snac.h" diff --git a/xs_match.h b/xs_match.h new file mode 100644 index 0000000..9f12c15 --- /dev/null +++ b/xs_match.h @@ -0,0 +1,71 @@ +/* copyright (c) 2022 - 2023 grunfink et al. / MIT license */ + +#ifndef _XS_MATCH_H + +#define _XS_MATCH_H + +/* spec is very similar to shell file globbing: + an * matches anything; + a ? matches any character; + | select alternative strings to match; + a \\ escapes a special character; + any other char matches itself. */ + +int xs_match(const char *str, const char *spec); + +#ifdef XS_IMPLEMENTATION + +int xs_match(const char *str, const char *spec) +{ + const char *o_str = str; + +again: + if (*spec == '*') { + spec++; /* wildcard */ + + do { + if (xs_match(str, spec)) + return 1; + str++; + } while (*str); + + return 0; + } + + if (*spec == '?' && *str) { + spec++; /* any character */ + str++; + goto again; + } + + if (*spec == '|') + return 1; /* alternative separator? positive match */ + + if (!*spec) + return 1; /* end of spec? positive match */ + + if (*spec == '\\') + spec++; /* escaped char */ + + if (*spec == *str) { + spec++; /* matched 1 char */ + str++; + goto again; + } + + /* not matched; are there any alternatives? */ + while (*spec) { + if (*spec == '|') + return xs_match(o_str, spec + 1); /* try next alternative */ + + if (*spec == '\\') + spec++; /* escaped char */ + spec++; + } + + return 0; +} + +#endif /* XS_IMPLEMENTATION */ + +#endif /* XS_MATCH_H */ diff --git a/xs_regex.h b/xs_regex.h index 3425661..e86b78e 100644 --- a/xs_regex.h +++ b/xs_regex.h @@ -6,8 +6,8 @@ xs_list *xs_regex_split_n(const char *str, const char *rx, int count); #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL) -xs_list *xs_regex_match_n(const char *str, const char *rx, int count); -#define xs_regex_match(str, rx) xs_regex_match_n(str, rx, XS_ALL) +xs_list *xs_regex_select_n(const char *str, const char *rx, int count); +#define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL) xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count); #define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL) #define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count) @@ -55,8 +55,8 @@ xs_list *xs_regex_split_n(const char *str, const char *rx, int count) } -xs_list *xs_regex_match_n(const char *str, const char *rx, int count) -/* returns a list with upto count matches */ +xs_list *xs_regex_select_n(const char *str, const char *rx, int count) +/* selects all matches and return them as a list */ { xs_list *list = xs_list_new(); xs *split = NULL; diff --git a/xs_version.h b/xs_version.h index 5fd4466..f6c3412 100644 --- a/xs_version.h +++ b/xs_version.h @@ -1 +1 @@ -/* 0ca53ca2b1c34efa95639d2a0f5bf4bd32f8958c */ +/* 06767a70773865042a70680aef50f7ecb077681a */