From acf3cdcf80da7c3443e202a02d4b626c13e9e8dd Mon Sep 17 00:00:00 2001 From: default Date: Fri, 17 Nov 2023 03:51:04 +0100 Subject: [PATCH] Backport from xs. --- xs.h | 7 +++++-- xs_json.h | 12 ++++-------- xs_unicode.h | 29 +++++++++++++++++++++++++++++ xs_version.h | 2 +- 4 files changed, 39 insertions(+), 11 deletions(-) diff --git a/xs.h b/xs.h index 7b85dcb..c0857bc 100644 --- a/xs.h +++ b/xs.h @@ -1180,6 +1180,8 @@ void *xs_memmem(const char *haystack, int h_size, const char *needle, int n_size /** hex **/ +static char xs_hex_digits[] = "0123456789abcdef"; + xs_str *xs_hex_enc(const xs_val *data, int size) /* returns an hexdump of data */ { @@ -1190,8 +1192,9 @@ xs_str *xs_hex_enc(const xs_val *data, int size) p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); for (n = 0; n < size; n++) { - snprintf(p, 3, "%02x", (unsigned char)data[n]); - p += 2; + *p++ = xs_hex_digits[*data >> 4 & 0xf]; + *p++ = xs_hex_digits[*data & 0xf]; + data++; } *p = '\0'; diff --git a/xs_json.h b/xs_json.h index e9dc052..03f7903 100644 --- a/xs_json.h +++ b/xs_json.h @@ -248,24 +248,20 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t) break; } - if (cp >= 0xd800 && cp <= 0xdfff) { - /* it's a surrogate pair */ - cp = (cp & 0x3ff) << 10; - + if (xs_is_surrogate(cp)) { /* \u must follow */ if (fgetc(f) != '\\' || fgetc(f) != 'u') { *t = JS_ERROR; break; } - unsigned int i; - if (fscanf(f, "%04x", &i) != 1) { + unsigned int p2; + if (fscanf(f, "%04x", &p2) != 1) { *t = JS_ERROR; break; } - cp |= (i & 0x3ff); - cp += 0x10000; + cp = xs_surrogate_dec(cp, p2); } /* replace dangerous control codes with their visual representations */ diff --git a/xs_unicode.h b/xs_unicode.h index c7d6190..f5880f0 100644 --- a/xs_unicode.h +++ b/xs_unicode.h @@ -8,6 +8,9 @@ xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); unsigned int xs_utf8_dec(char **str); int xs_unicode_width(unsigned int cpoint); + int xs_is_surrogate(unsigned int cpoint); + unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2); + unsigned int xs_surrogate_enc(unsigned int cpoint); unsigned int *_xs_unicode_upper_search(unsigned int cpoint); unsigned int *_xs_unicode_lower_search(unsigned int cpoint); #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) @@ -138,6 +141,32 @@ int xs_unicode_width(unsigned int cpoint) } +/** surrogate pairs **/ + +int xs_is_surrogate(unsigned int cpoint) +/* checks if cpoint is the first element of a Unicode surrogate pair */ +{ + return cpoint >= 0xd800 && cpoint <= 0xdfff; +} + + +unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2) +/* "decodes" a surrogate pair into a codepoint */ +{ + return 0x10000 | ((p1 & 0x3ff) << 10) | (p2 & 0x3ff); +} + + +unsigned int xs_surrogate_enc(unsigned int cpoint) +/* "encodes" a Unicode into a surrogate pair (p1 in the MSB word) */ +{ + unsigned int p1 = 0xd7c0 + (cpoint >> 10); + unsigned int p2 = 0xdc00 + (cpoint & 0x3ff); + + return (p1 << 16) | p2; +} + + #ifdef _XS_UNICODE_TBL_H /* include xs_unicode_tbl.h before this one to use these functions */ diff --git a/xs_version.h b/xs_version.h index d888d29..42dc7d2 100644 --- a/xs_version.h +++ b/xs_version.h @@ -1 +1 @@ -/* 40d63c59610c642d1c8b2e2b94bbf5cdde69ad6a */ +/* 0932615dfe85e5d8544c4b2052eb66f3a430eb8c */