mirror of
https://codeberg.org/grunfink/snac2.git
synced 2024-11-14 17:45:04 +00:00
Backport from xs.
This commit is contained in:
parent
6c2ca0d40a
commit
ca2e0fcd89
2 changed files with 75 additions and 26 deletions
99
xs_unicode.h
99
xs_unicode.h
|
@ -5,42 +5,91 @@
|
|||
#define _XS_UNICODE_H
|
||||
|
||||
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
|
||||
char *xs_utf8_dec(const char *str, unsigned int *cpoint);
|
||||
|
||||
|
||||
#ifdef XS_IMPLEMENTATION
|
||||
|
||||
/** utf-8 **/
|
||||
|
||||
char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
|
||||
/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
|
||||
{
|
||||
unsigned char *p = (unsigned char *)buf;
|
||||
|
||||
if (cpoint < 0x80) /* 1 byte char */
|
||||
*p++ = cpoint & 0xff;
|
||||
else {
|
||||
if (cpoint < 0x800) /* 2 byte char */
|
||||
*p++ = 0xc0 | (cpoint >> 6);
|
||||
else {
|
||||
if (cpoint < 0x10000) /* 3 byte char */
|
||||
*p++ = 0xe0 | (cpoint >> 12);
|
||||
else { /* 4 byte char */
|
||||
*p++ = 0xf0 | (cpoint >> 18);
|
||||
*p++ = 0x80 | ((cpoint >> 12) & 0x3f);
|
||||
}
|
||||
|
||||
*p++ = 0x80 | ((cpoint >> 6) & 0x3f);
|
||||
}
|
||||
|
||||
*p++ = 0x80 | (cpoint & 0x3f);
|
||||
}
|
||||
|
||||
return (char *)p;
|
||||
}
|
||||
|
||||
|
||||
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
|
||||
/* encodes an Unicode codepoint to utf8 */
|
||||
/* encodes an Unicode codepoint to utf-8 into str */
|
||||
{
|
||||
unsigned char tmp[4];
|
||||
int n = 0;
|
||||
char tmp[4], *p;
|
||||
|
||||
if (cpoint < 0x80)
|
||||
tmp[n++] = cpoint & 0xff;
|
||||
else
|
||||
if (cpoint < 0x800) {
|
||||
tmp[n++] = 0xc0 | (cpoint >> 6);
|
||||
tmp[n++] = 0x80 | (cpoint & 0x3f);
|
||||
}
|
||||
else
|
||||
if (cpoint < 0x10000) {
|
||||
tmp[n++] = 0xe0 | (cpoint >> 12);
|
||||
tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
|
||||
tmp[n++] = 0x80 | (cpoint & 0x3f);
|
||||
}
|
||||
else
|
||||
if (cpoint < 0x200000) {
|
||||
tmp[n++] = 0xf0 | (cpoint >> 18);
|
||||
tmp[n++] = 0x80 | ((cpoint >> 12) & 0x3f);
|
||||
tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
|
||||
tmp[n++] = 0x80 | (cpoint & 0x3f);
|
||||
}
|
||||
p = _xs_utf8_enc(tmp, cpoint);
|
||||
|
||||
return xs_append_m(str, (char *)tmp, n);
|
||||
return xs_append_m(str, tmp, p - tmp);
|
||||
}
|
||||
|
||||
|
||||
char *xs_utf8_dec(const char *str, unsigned int *cpoint)
|
||||
/* decodes an utf-8 char inside str into cpoint and returns the next position */
|
||||
{
|
||||
unsigned char *p = (unsigned char *)str;
|
||||
int c = *p++;
|
||||
int cb = 0;
|
||||
|
||||
if ((c & 0x80) == 0) { /* 1 byte char */
|
||||
*cpoint = c;
|
||||
}
|
||||
else
|
||||
if ((c & 0xe0) == 0xc0) { /* 2 byte char */
|
||||
*cpoint = (c & 0x1f) << 6;
|
||||
cb = 1;
|
||||
}
|
||||
else
|
||||
if ((c & 0xf0) == 0xe0) { /* 3 byte char */
|
||||
*cpoint = (c & 0x0f) << 12;
|
||||
cb = 2;
|
||||
}
|
||||
else
|
||||
if ((c & 0xf8) == 0xf0) { /* 4 byte char */
|
||||
*cpoint = (c & 0x07) << 18;
|
||||
cb = 3;
|
||||
}
|
||||
|
||||
/* process the continuation bytes */
|
||||
while (cb--) {
|
||||
if ((*p & 0xc0) == 0x80)
|
||||
*cpoint |= (*p++ & 0x3f) << (cb * 6);
|
||||
else {
|
||||
*cpoint = 0xfffd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (char *)p;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XS_IMPLEMENTATION */
|
||||
|
||||
#endif /* _XS_UNICODE_H */
|
||||
|
|
|
@ -1 +1 @@
|
|||
/* 1948fa3c5f0df994170cd38b9144b99734b071e6 */
|
||||
/* 3588cbb7859917f1c5965254f8a53c3349c773ea */
|
||||
|
|
Loading…
Reference in a new issue