diff --git a/xs.h b/xs.h index 6ac70bf..40dd706 100644 --- a/xs.h +++ b/xs.h @@ -59,6 +59,7 @@ xs_val *xs_insert_m(xs_val *data, int offset, const char *mem, int size); #define xs_append_m(data, mem, size) xs_insert_m(data, xs_size(data) - 1, mem, size) xs_str *xs_str_new(const char *str); +xs_str *xs_str_new_sz(const char *mem, int sz); xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix); #define xs_str_prepend_i(str, prefix) xs_str_wrap_i(prefix, str, NULL) #define xs_str_cat(str, suffix) xs_str_wrap_i(NULL, str, suffix) @@ -72,6 +73,8 @@ int xs_starts_and_ends(const char *prefix, const char *str, const char *suffix); #define xs_startswith(str, prefix) xs_starts_and_ends(prefix, str, NULL) #define xs_endswith(str, suffix) xs_starts_and_ends(NULL, str, suffix) xs_str *xs_crop_i(xs_str *str, int start, int end); +xs_str *xs_lstrip_chars_i(xs_str *str, const char *chars); +xs_str *xs_rstrip_chars_i(xs_str *str, const char *chars); xs_str *xs_strip_chars_i(xs_str *str, const char *chars); #define xs_strip_i(str) xs_strip_chars_i(str, " \r\n\t\v\f") xs_str *xs_tolower_i(xs_str *str); @@ -424,6 +427,17 @@ xs_str *xs_str_new(const char *str) } +xs_str *xs_str_new_sz(const char *mem, int sz) +/* creates a new string from a memory block, adding an asciiz */ +{ + xs_str *s = xs_realloc(NULL, _xs_blk_size(sz + 1)); + memcpy(s, mem, sz); + s[sz] = '\0'; + + return s; +} + + xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix) /* wraps str with prefix and suffix */ { @@ -546,29 +560,39 @@ xs_str *xs_crop_i(xs_str *str, int start, int end) } -xs_str *xs_strip_chars_i(xs_str *str, const char *chars) -/* strips the string of chars from the start and the end */ +xs_str *xs_lstrip_chars_i(xs_str *str, const char *chars) +/* strips all chars from the start of str */ +{ + int n; + + for (n = 0; str[n] && strchr(chars, str[n]); n++); + + if (n) + str = xs_collapse(str, 0, n); + + return str; +} + + +xs_str *xs_rstrip_chars_i(xs_str *str, const char *chars) +/* strips all chars from the end of str */ { - XS_ASSERT_TYPE(str, XSTYPE_STRING); - int n; - /* strip first from the end */ for (n = strlen(str); n > 0 && strchr(chars, str[n - 1]); n--); str[n] = '\0'; - if (str[0]) { - /* now strip from the beginning */ - for (n = 0; str[n] && strchr(chars, str[n]); n++); - - if (n) - str = xs_collapse(str, 0, n); - } - return str; } +xs_str *xs_strip_chars_i(xs_str *str, const char *chars) +/* strips the string of chars from the start and the end */ +{ + return xs_lstrip_chars_i(xs_rstrip_chars_i(str, chars), chars); +} + + xs_str *xs_tolower_i(xs_str *str) /* convert to lowercase */ { @@ -859,11 +883,9 @@ xs_list *xs_split_n(const char *str, const char *sep, int times) list = xs_list_new(); while (times > 0 && (ss = strstr(str, sep)) != NULL) { - /* add the first part (without the asciiz) */ - list = xs_list_append_m(list, str, ss - str); - - /* add the asciiz */ - list = xs_insert_m(list, xs_size(list) - 1, "", 1); + /* create a new string with this slice and add it to the list */ + xs *s = xs_str_new_sz(str, ss - str); + list = xs_list_append(list, s); /* skip past the separator */ str = ss + sz; @@ -1131,8 +1153,7 @@ int xs_data_size(const xs_data *value) void xs_data_get(const xs_data *value, void *data) /* copies the raw data stored inside value into data */ { - int size = _xs_get_24b(value + 1) - 4; - memcpy(data, &value[4], size); + memcpy(data, &value[4], xs_data_size(value)); } diff --git a/xs_regex.h b/xs_regex.h index 7e1c80f..3425661 100644 --- a/xs_regex.h +++ b/xs_regex.h @@ -33,12 +33,12 @@ xs_list *xs_regex_split_n(const char *str, const char *rx, int count) while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) { /* add first the leading part of the string */ - list = xs_list_append_m(list, p, rm.rm_so); - list = xs_insert_m(list, xs_size(list) - 1, "", 1); + xs *s1 = xs_str_new_sz(p, rm.rm_so); + list = xs_list_append(list, s1); /* add now the matched text as the separator */ - list = xs_list_append_m(list, p + rm.rm_so, rm.rm_eo - rm.rm_so); - list = xs_insert_m(list, xs_size(list) - 1, "", 1); + xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so); + list = xs_list_append(list, s2); /* move forward */ offset += rm.rm_eo; diff --git a/xs_unicode.h b/xs_unicode.h index 35cd9f7..c7d6190 100644 --- a/xs_unicode.h +++ b/xs_unicode.h @@ -16,6 +16,7 @@ unsigned int xs_unicode_to_lower(unsigned int cpoint); int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac); int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint); + int xs_unicode_is_alpha(unsigned int cpoint); #ifdef XS_IMPLEMENTATION @@ -101,6 +102,15 @@ unsigned int xs_utf8_dec(char **str) } +static int int_range_cmp(const void *p1, const void *p2) +{ + const unsigned int *a = p1; + const unsigned int *b = p2; + + return *a < b[0] ? -1 : *a > b[1] ? 1 : 0; +} + + /* intentionally dead simple */ static unsigned int xs_unicode_width_table[] = { @@ -119,20 +129,12 @@ static unsigned int xs_unicode_width_table[] = { int xs_unicode_width(unsigned int cpoint) /* returns the width in columns of a Unicode codepoint (somewhat simplified) */ { - unsigned int *p = xs_unicode_width_table; - unsigned int *e = p + sizeof(xs_unicode_width_table) / sizeof(unsigned int); + unsigned int *r = bsearch(&cpoint, xs_unicode_width_table, + sizeof(xs_unicode_width_table) / (sizeof(unsigned int) * 3), + sizeof(unsigned int) * 3, + int_range_cmp); - while (p < e) { - if (cpoint < p[0]) - return 1; - - if (cpoint >= p[0] && cpoint <= p[1]) - return p[2]; - - p += 3; - } - - return 0; + return r ? r[2] : 1; } @@ -232,6 +234,18 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint) } +int xs_unicode_is_alpha(unsigned int cpoint) +/* checks if a codepoint is an alpha (i.e. a letter) */ +{ + unsigned int *r = bsearch(&cpoint, xs_unicode_alpha_table, + sizeof(xs_unicode_alpha_table) / (sizeof(unsigned int) * 2), + sizeof(unsigned int) * 2, + int_range_cmp); + + return !!r; +} + + #endif /* _XS_UNICODE_TBL_H */ #endif /* XS_IMPLEMENTATION */ diff --git a/xs_version.h b/xs_version.h index 800a008..a05f5bb 100644 --- a/xs_version.h +++ b/xs_version.h @@ -1 +1 @@ -/* fdd04f1862e0d8bdebb7b438798914643895d43f */ +/* 5bf06243b37eec60e48e53d87d4d147d01ad9924 */