Backport from xs.

This commit is contained in:
default 2023-11-17 03:51:04 +01:00
parent 57ab8df007
commit acf3cdcf80
4 changed files with 39 additions and 11 deletions

7
xs.h
View file

@ -1180,6 +1180,8 @@ void *xs_memmem(const char *haystack, int h_size, const char *needle, int n_size
/** hex **/
static char xs_hex_digits[] = "0123456789abcdef";
xs_str *xs_hex_enc(const xs_val *data, int size)
/* returns an hexdump of data */
{
@ -1190,8 +1192,9 @@ xs_str *xs_hex_enc(const xs_val *data, int size)
p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
for (n = 0; n < size; n++) {
snprintf(p, 3, "%02x", (unsigned char)data[n]);
p += 2;
*p++ = xs_hex_digits[*data >> 4 & 0xf];
*p++ = xs_hex_digits[*data & 0xf];
data++;
}
*p = '\0';

View file

@ -248,24 +248,20 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t)
break;
}
if (cp >= 0xd800 && cp <= 0xdfff) {
/* it's a surrogate pair */
cp = (cp & 0x3ff) << 10;
if (xs_is_surrogate(cp)) {
/* \u must follow */
if (fgetc(f) != '\\' || fgetc(f) != 'u') {
*t = JS_ERROR;
break;
}
unsigned int i;
if (fscanf(f, "%04x", &i) != 1) {
unsigned int p2;
if (fscanf(f, "%04x", &p2) != 1) {
*t = JS_ERROR;
break;
}
cp |= (i & 0x3ff);
cp += 0x10000;
cp = xs_surrogate_dec(cp, p2);
}
/* replace dangerous control codes with their visual representations */

View file

@ -8,6 +8,9 @@
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
unsigned int xs_utf8_dec(char **str);
int xs_unicode_width(unsigned int cpoint);
int xs_is_surrogate(unsigned int cpoint);
unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2);
unsigned int xs_surrogate_enc(unsigned int cpoint);
unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
unsigned int *_xs_unicode_lower_search(unsigned int cpoint);
#define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint))
@ -138,6 +141,32 @@ int xs_unicode_width(unsigned int cpoint)
}
/** surrogate pairs **/
int xs_is_surrogate(unsigned int cpoint)
/* checks if cpoint is the first element of a Unicode surrogate pair */
{
return cpoint >= 0xd800 && cpoint <= 0xdfff;
}
unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2)
/* "decodes" a surrogate pair into a codepoint */
{
return 0x10000 | ((p1 & 0x3ff) << 10) | (p2 & 0x3ff);
}
unsigned int xs_surrogate_enc(unsigned int cpoint)
/* "encodes" a Unicode into a surrogate pair (p1 in the MSB word) */
{
unsigned int p1 = 0xd7c0 + (cpoint >> 10);
unsigned int p2 = 0xdc00 + (cpoint & 0x3ff);
return (p1 << 16) | p2;
}
#ifdef _XS_UNICODE_TBL_H
/* include xs_unicode_tbl.h before this one to use these functions */

View file

@ -1 +1 @@
/* 40d63c59610c642d1c8b2e2b94bbf5cdde69ad6a */
/* 0932615dfe85e5d8544c4b2052eb66f3a430eb8c */